Пример #1
0
def test_dot_real(data_dict):
    """Dot operator testing with real datasets"""
    data_dir = os.path.join(os.getcwd(), 'data')

    path = os.path.join(data_dir, data_dict['data_name'])
    if not os.path.exists(path):
        get_data(
            data_dir,
            data_dict['data_name'],
            data_dict['url'],
            data_dict['data_origin_name']
        )
        assert os.path.exists(path)

    k = data_dict['feature_dim']
    m = data_dict['m']
    batch_size_list = data_dict['batch_size']

    default_output_index = data_dict['default_index']['output_dim']
    default_batch_size_index = data_dict['default_index']['batch_size']
    density = estimate_density(path, data_dict['feature_dim'])
    num_batches = data_dict['num_batches']

    assert default_batch_size_index < len(batch_size_list)
    assert default_output_index < len(m)
    if ARGS.verbose:
        print("Running Benchmarking on %r data") % data_dict['data_mini']
    print('{:>15} {:>10} {:>10} {:>10} {:>20} {:>15} {:>15} {:>10} {:>10}'.format('density(%)',
                                                                                 'n',
                                                                                 'm',
                                                                                 'k',
                                                                                 't_dense/t_sparse',
                                                                                 't_dense(ms)',
                                                                                 't_sparse(ms)',
                                                                                 'is_transpose',
                                                                                 'rhs_rsp'))


    for output_dim in m:
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, output_dim, density,
                              batch_size_list[default_batch_size_index], num_batches)
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, output_dim, density,
                              batch_size_list[default_batch_size_index], num_batches,
                              transpose=True)
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, output_dim, density,
                              batch_size_list[default_batch_size_index], num_batches, rsp=True)

    for batch_size in batch_size_list:
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, m[default_output_index], density, batch_size, num_batches)
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, m[default_output_index], density, batch_size, num_batches,
                              transpose=True)
        _compare_sparse_dense(data_dir, data_dict['data_name'], data_dict['data_mini'],
                              k, output_dim, density,
                              batch_size_list[default_batch_size_index], num_batches, rsp=True)
Пример #2
0
def compute_portvals(start_date, end_date, orders, startval):
    # get the trading days using SPY as reference
    dates = pd.date_range(start_date, end_date)
    df = get_data(['SPY'], dates)
    # Make the sell orders a negative value
    orders['Shares'][orders['Order'].str.upper()=='SELL'] = -orders['Shares'][orders['Order'].str.upper()=='SELL']

    # Create a data frame to hold a matrix of all the stocks
    symbols = np.unique(orders['Symbol'].values.ravel())  
    for stock in symbols: 
        df[stock]=0   
    
    # Get the prices for each day in the index
    # Front fill the prices where we have an NA, then backfill
    prices = get_data(symbols, df.index, False)
    prices = prices.fillna(method='ffill', axis=0)
    prices = prices.fillna(method='bfill', axis=0)

    # Add the starting value and a cash value
    df['Cash'] = startval + 0.0
    prices['Cash'] = 1
    orders['Prices'] = 0
    for ind, row in orders.iterrows():
        # calculate leverage        
        # leverage = (sum(longs) + sum(abs(shorts)) / ((sum(longs) - sum(abs(shorts)) + cash)
        # get temporary table after the transaction is made, and before the transaction is made
        df_chk, df_chk_b4 = df.ix[ind,1:], df.ix[ind,1:]
        df_chk [row['Symbol']] = df[row['Symbol']][ind] + row['Shares']
        df_chk ['Cash'] = df['Cash'][ind] - prices[row['Symbol']][ind] * row['Shares']
        df_chk        = prices.ix[ind] * df_chk
        df_chk_b4  = prices.ix[ind] * df_chk_b4
        # calculate the leverage after and before 
        lev_after = sum(abs(df_chk[:-1])) / sum(df_chk )
        lev_before = sum(abs(df_chk_b4[:-1])) / sum(df_chk_b4 )
        # print lev_after, lev_before, ind
        #if lev_after < 1000.0 or lev_after < lev_before :      
        df[row['Symbol']][ind:end_date] = df[row['Symbol']][ind:end_date] + row['Shares']
        df['Cash'][ind:end_date] = df['Cash'][ind:end_date] - prices[row['Symbol']][ind] * row['Shares']
        #else:
        #    print "Cancel the order", ind, row['Symbol'], row['Shares'], "Lev before", lev_before , "Lev after",  lev_after 
    
    df = df.iloc[:,1:] * prices
    portvals = df.sum(axis=1)
    # print portvals
    return portvals   


#def test_run():
    """Driver function."""
    # Define input parameters
    start_date = '2011-01-05'
    end_date = '2011-01-20'
    orders_file = os.path.join("orders", "orders-short.csv")
    start_val = 1000000
Пример #3
0
def test_run():
	symbols = ['IBM']
	train_dates = pd.date_range('2008-1-1', '2010-12-31')
	test_dates = pd.date_range('2011-1-1', '2011-12-31')
	training = get_data(symbols, train_dates)
	testing = get_data(symbols, test_dates)
	trainingIBM = training[symbols]
	testingIBM = testing[symbols]

	testing_result = train_model(trainingIBM, testingIBM)
	ax = testing_result[['IBM', 'pred']].plot(title='Predicted market')
	ax.set_xlabel('Date')
	ax.set_ylabel('Price')
	fig = ax.get_figure()
	fig.savefig("output/predicted_market.png")

	generate_orders(testing_result)

	orders_file = os.path.join("orders", "orders.csv")
	start_val = 10000
	# Process orders
	portvals = compute_portvals('2011-1-1', '2011-12-31', orders_file, start_val)
	if isinstance(portvals, pd.DataFrame):
	    portvals = portvals[portvals.columns[0]]  # if a DataFrame is returned select the first column to get a Series
	# print portvals
	# Get portfolio stats
	cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio = get_portfolio_stats(portvals)
	# Simulate a $SPX-only reference portfolio to get stats
	prices_SPX = get_data(['$SPX'], test_dates)
	prices_SPX = prices_SPX[['$SPX']]  # remove SPY
	portvals_SPX = get_portfolio_value(prices_SPX, [1.0])
	cum_ret_SPX, avg_daily_ret_SPX, std_daily_ret_SPX, sharpe_ratio_SPX = get_portfolio_stats(portvals_SPX)

	print "Data Range: {} to {}".format('2011-1-1', '2011-12-31')
	print
	print "Sharpe Ratio of Fund: {}".format(sharpe_ratio)
	print "Sharpe Ratio of $SPX: {}".format(sharpe_ratio_SPX)
	print
	print "Cumulative Return of Fund: {}".format(cum_ret)
	print "Cumulative Return of $SPX: {}".format(cum_ret_SPX)
	print
	print "Standard Deviation of Fund: {}".format(std_daily_ret)
	print "Standard Deviation of $SPX: {}".format(std_daily_ret_SPX)
	print
	print "Average Daily Return of Fund: {}".format(avg_daily_ret)
	print "Average Daily Return of $SPX: {}".format(avg_daily_ret_SPX)
	print
	print "Final Portfolio Value: {}".format(portvals[-1])

	# Plot computed daily portfolio value
	df_temp = pd.concat([portvals, prices_SPX['$SPX']], keys=['Portfolio', '$SPX'], axis=1)
	plot_normalized_data(df_temp, title="Daily portfolio value and $SPX")
Пример #4
0
def assess_portfolio(sd = dt.datetime(2008,1,1), ed = dt.datetime(2009,1,1), \
    syms = ['GOOG','AAPL','GLD','XOM'], \
    allocs=[0.1,0.2,0.3,0.4], \
    sv=1000000, rfr=0.0, sf=252.0, \
    gen_plot=False):

    # Read in adjusted closing prices for given symbols, date range
    dates = pd.date_range(sd, ed)
    prices_all = get_data(syms, dates)  # automatically adds SPY
    prices = prices_all[syms]  # only portfolio symbols
    prices_SPY = prices_all['SPY']  # only SPY, for comparison later
    prices_SPY = (prices_SPY/prices_SPY.iloc[0])*sv

    # Get daily portfolio value
    port_val = get_portfolio_value(prices, allocs, sv)
    
    # Get portfolio statistics (note: std_daily_ret = volatility)
    cr, adr, sddr, sr = get_portfolio_stats(port_val, rfr, sf)
    
    # Compare daily portfolio value with SPY using a normalized plot
    if gen_plot:
        # Plot normalized portfolio value.
        df_temp = pd.concat([port_val/sv, prices_SPY/sv], keys=['Portfolio', 'SPY'], axis=1)
        plot_data(df_temp,
                  title="Daily portfolio value and SPY",
                  ylabel="Normalized price")
        
    # Compute end value
    ev = port_val[-1]

    return cr, adr, sddr, sr, ev
Пример #5
0
def run_strategy():
    symbol = 'IBM'
    start_date = '2007-12-31'
    end_date = '2009-12-31'

    prices_IBM = get_data([symbol], pd.date_range(start_date, end_date))

    bollinger_df = calc_bollinger_bands(prices_IBM[symbol], 20)
    order_df = build_orders(bollinger_df)

    order_df.index.name = 'Date'
    order_df.to_csv("bollinger_order.csv")

    #Build Plot
    plt.style.use('ggplot')
    ax = bollinger_df.plot()
    #Add lines showing buy/sells
    for index, row in order_df.iterrows():
        if row['Order'] == 'BUY':
            ax.axvline(x=index, color='g')
        elif row['Order'] == 'SELL':
            ax.axvline(x=index, color='r')


    plt.show()
Пример #6
0
def compute_insample_data_ML4T(start_date,end_date):
    dates=pd.date_range(start_date,end_date)
    prices=get_data(['ML4T-399'],dates,True)
    prices=prices.drop('SPY',axis=1)
    #calculating volatility
    daily_returns = prices.copy()
    daily_returns[1:] = (prices[1:]/prices[:-1].values)-1.0
    daily_returns.ix[0,:]=0
    vol=pd.rolling_std(daily_returns,window=10)
    vol1=vol[9:-5]
    
    #calculating momentum
    momentum=prices.copy()
    momentum[9:] = (prices[9:]/prices[:-9].values)-1.0
    momentum1=momentum[9:-5]
    
    #calculating bollinger values
    sma = pd.rolling_mean(prices,window=10)
    sma1 = sma.dropna()
    std=pd.rolling_std(prices,window=10)
    std1=std[9:]
    bb_prices=prices[9:]
    bb_value=(bb_prices - sma1)/(2*std1)
    bb_value1=bb_value[0:-5]
    
    #shifting prices
    shifted_prices=prices.shift(-5)
    future_prices = prices.copy()
    prices1=prices[9:-5]
    future_return=(shifted_prices/prices) - 1.0
    future_return1=future_return[9:-5]
    
    vol_array=vol1.values
    momentum_array=momentum1.values
    bb_value_array=bb_value1.values

    data=np.concatenate((vol_array,momentum_array,bb_value_array,future_return1), axis=1)
    predY = knn_learner(data)
    predY_df_return=pd.DataFrame(predY,index=future_return1.index,columns=['predY'])
    predY_df_price=prices1*(predY_df_return.values+1)
    future_prices=prices1*(future_return1.values+1)

    predY_df_price=predY_df_price.rename(columns={'ML4T-399':'Predicted Y'})
    ax=predY_df_price.plot(title="Sine Data Training Y/Price/Predicted Y[2008-2009]")
    
    future_prices = future_prices.rename(columns={'ML4T-399':'Training Y'})
    prices1 = prices1.rename(columns={'ML4T-399':'Price'})
    future_prices.plot(ax=ax)
    prices1.plot(ax=ax)
    plt.ylim((0,100))
    start_date='2008-01-15'
    end_date='2009-12-23'
    print
    print "Sine Data In Sample Statistics"
    print
    my_strategy_ML4T(prices1,predY_df_price,start_date,end_date,"ML4T_insample_orders","Sine Data In Sample Entries/Exits","Sine Data In Sample Backtest")

    start_date='2010-01-01'
    end_date='2010-12-31'
    compute_outsample_data_ML4T(data,start_date,end_date)
Пример #7
0
def get_sequence_list_and_phyche_value_pseknc(input_data, extra_phyche_index=None):
    """For PseDNC, PseKNC, make sequence_list and phyche_value.

    :param input_data: file type or handle.
    :param extra_phyche_index: dict, the key is the dinucleotide (string),
                                     the value is its physicochemical property value (list).
                               It means the user-defined physicochemical indices.
    """
    if extra_phyche_index is None:
        extra_phyche_index = {}

    original_phyche_value = {
        'AA': [0.06, 0.5, 0.09, 1.59, 0.11, -0.11],
        'AC': [1.5, 0.5, 1.19, 0.13, 1.29, 1.04],
        'GT': [1.5, 0.5, 1.19, 0.13, 1.29, 1.04],
        'AG': [0.78, 0.36, -0.28, 0.68, -0.24, -0.62],
        'CC': [0.06, 1.08, -0.28, 0.56, -0.82, 0.24],
        'CA': [-1.38, -1.36, -1.01, -0.86, -0.62, -1.25],
        'CG': [-1.66, -1.22, -1.38, -0.82, -0.29, -1.39],
        'TT': [0.06, 0.5, 0.09, 1.59, 0.11, -0.11],
        'GG': [0.06, 1.08, -0.28, 0.56, -0.82, 0.24],
        'GC': [-0.08, 0.22, 2.3, -0.35, 0.65, 1.59],
        'AT': [1.07, 0.22, 0.83, -1.02, 2.51, 1.17],
        'GA': [-0.08, 0.5, 0.09, 0.13, -0.39, 0.71],
        'TG': [-1.38, -1.36, -1.01, -0.86, -0.62, -1.25],
        'TA': [-1.23, -2.37, -1.38, -2.24, -1.51, -1.39],
        'TC': [-0.08, 0.5, 0.09, 0.13, -0.39, 0.71],
        'CT': [0.78, 0.36, -0.28, 0.68, -0.24, -0.62]}

    sequence_list = get_data(input_data)
    phyche_value = extend_phyche_index(original_phyche_value, extra_phyche_index)

    return sequence_list, phyche_value
Пример #8
0
def optimize_portfolio(sd=dt.datetime(2008,1,1), ed=dt.datetime(2009,1,1), \
    syms=['GOOG','AAPL','GLD','XOM'], gen_plot=False):

    # Read in adjusted closing prices for given symbols, date range
    dates = pd.date_range(sd, ed)
    prices_all = get_data(syms, dates)  # automatically adds SPY
    prices = prices_all[syms]  # only portfolio symbols
    prices_SPY = prices_all['SPY']  # only SPY, for comparison later

    # find the allocations for the optimal portfolio
    # note that the values here ARE NOT meant to be correct for a test case
    allocs = optimize_allocs(prices, min_sharpe_fun)
    cr, adr, sddr, sr = compute_portfolio_stats(get_port_val(prices, allocs), allocs)

    # Get daily portfolio value
    port_val = get_port_val(prices, allocs)

    # Compare daily portfolio value with SPY using a normalized plot
    if gen_plot:
        # add code to plot here
        df_temp = pd.concat([port_val, prices_SPY], keys=['Portfolio', 'SPY'], axis=1)
        df_temp = df_temp / df_temp.iloc[0]
        plot_stock_data(df_temp.ix[sd : ed, ['Portfolio', 'SPY']])
        pass

    return allocs, cr, adr, sddr, sr
Пример #9
0
def test_run():
    # Read data
    dates = pd.date_range('2012-01-01', '2012-12-31')
    symbols = ['SPY']
    df = get_data(symbols, dates)

    # Compute Bollinger Bands
    # 1. Compute rolling mean
    rm_SPY = get_rolling_mean(df['SPY'], window=20)

    # 2. Compute rolling standard deviation
    rstd_SPY = get_rolling_std(df['SPY'], window=20)

    # 3. Compute upper and lower bands
    upper_band, lower_band = get_bollinger_bands(rm_SPY, rstd_SPY)

    # Plot raw SPY values, rolling mean and Bollinger Bands
    ax = df['SPY'].plot(title="Bollinger Bands", label='SPY')
    rm_SPY.plot(label='Rolling mean', ax=ax)
    upper_band.plot(label='upper band', ax=ax)
    lower_band.plot(label='lower band', ax=ax)

    # Add axis labels and legend
    ax.set_xlabel("Date")
    ax.set_ylabel("Price")
    ax.legend(loc='upper left')
    plt.show()
Пример #10
0
def ipseknc(input_data, k, w, lamada, phyche_list, alphabet, extra_index_file=None, all_prop=False):
    """This is a complete process in iPseKNC, k is kmer, but the index is just for dinucleotide.

    :param k: int, the value of k-tuple.
    :param phyche_list: list, the input physicochemical properties list.
    :param extra_index_file: a file path includes the user-defined phyche_index.
    :param all_prop: bool, choose all physicochemical properties or not.
    """
    phyche_list = get_phyche_list(k=2, phyche_list=phyche_list,
                                  extra_index_file=extra_index_file, alphabet=alphabet, all_prop=all_prop)

    # Get phyche_vals.
    if extra_index_file is not None:
        extra_phyche_index = get_extra_index(extra_index_file)
        from util import normalize_index

        phyche_vals = get_phyche_value(k=2, phyche_list=phyche_list, alphabet=alphabet,
                                       extra_phyche_index=normalize_index(extra_phyche_index, alphabet,
                                                                          is_convert_dict=True))
    else:
        phyche_vals = get_phyche_value(k=2, phyche_list=phyche_list, alphabet=alphabet)

    seq_list = get_data(input_data, alphabet)

    return make_pseknc_vector(seq_list, phyche_vals, k, w, lamada, alphabet, theta_type=3)
Пример #11
0
def main():
	dates = pd.date_range('2009-01-01', '2012-12-31')
	symbols = ['SPY']
	df = get_data(symbols, dates)
	plot_data(df)

	daily_returns = compute_daily_returns(df)

	# histogram
	daily_returns.hist(bins=20)
	
	'''	
	call this twice if wanting to plot 2+ charts on same chart:
	daily_returns['SPY'].hist(bins=20, label="SPY")
	daily_returns['XOM'].hist(bins=20, label="XOM")
	'''

	mean = daily_returns['SPY'].mean()
	std = daily_returns['SPY'].std()

	plt.axvline(mean, color='w', linestyle='dashed', linewidth=2)
	plt.axvline(std, color='r', linestyle='dashed', linewidth=2)
	plt.axvline(-std, color='r', linestyle='dashed', linewidth=2)
	plt.show()

	print daily_returns.kurtosis()
Пример #12
0
def generate_orders(start_date, end_date, symbols):
	dates = pd.date_range(start_date, end_date)
	prices_all = get_data(symbols, dates)
	prices = prices_all[symbols]
	prices_bands = rollinger_bands(prices)
	long_entry, long_exit, short_entry, short_exit = calculate_entries(start_date, prices_bands)
	
	# save to orders.csv
	contatenated_entries = long_entry + long_exit + short_entry + short_exit  
	contatenated_entries = sorted(contatenated_entries, key=lambda x:x[0])
	df_entries = pd.DataFrame(contatenated_entries, columns=['Date', 'Symbol', 'Order', 'Shares'], index=None)
	df_entries = df_entries.set_index('Date')
	df_entries.to_csv("orders/orders.csv")

	# plot orders
	ax = prices_bands.plot(title="Bollinger Bands")
	ax.set_xlabel("Date")
	ax.set_ylabel("Price")

	for entry in long_entry:
	    ax.axvline(entry[0], c='green')
	for entry in long_exit:
	    ax.axvline(entry[0], c='black')
	for entry in short_entry:
	    ax.axvline(entry[0], c='red')
	for entry in short_exit:
	    ax.axvline(entry[0], c='black')
	# plt.show()
	fig = ax.get_figure()
	fig.savefig("output/entries.png")
Пример #13
0
def optimize_portfolio(sd=dt.datetime(2008,1,1), ed=dt.datetime(2009,1,1), \
    syms=['GOOG','AAPL','GLD','XOM'], gen_plot=False):

    # Read in adjusted closing prices for given symbols, date range
    dates = pd.date_range(sd, ed)
    prices_all = get_data(syms, dates, sd, ed)  # automatically adds SPY
    prices = prices_all[syms]  # only portfolio symbols
    prices_SPY = prices_all['SPY']  # only SPY, for comparison later

    # find the allocations for the optimal portfolio
    sv = 1000000
    normalized_prices = prices / prices.ix[0, :]
    x0 = np.array([0.2, 0.2, 0.3, 0.3, 0.0])
    optimal_allocs = spo.minimize(f, x0, args=(normalized_prices, sv),
                        method='SLSQP', options={'disp': True},
                        bounds=tuple((0, 1) for i in range(0, x0.size)),
                        constraints = ({'type': 'eq', 'fun': \
                        lambda inputs: 1.0 - np.sum(inputs)})
                        )
    allocs = optimal_allocs.x
    port_val, cr, adr, sddr, sr = calc_portfolio_stats(allocs, normalized_prices, sv)

    # Compare daily portfolio value with SPY using a normalized plot
    if gen_plot:
        df_temp = pd.concat([port_val, prices_SPY], keys=['Portfolio', 'SPY'], axis=1)
        df_temp = df_temp / df_temp.ix[0, :]
        df_temp.plot()
        plt.show()
        pass

    return allocs, cr, adr, sddr, sr
Пример #14
0
def test_run():

	# Read data
	dates = pd.date_range('2009-01-01', '2012-12-31')
	symbols = ['SPY']

	df = get_data(symbols, dates)
	plot_data(df)

	# Compute daily returns
	daily_returns = compute_daily_returns(df)
	plot_data(daily_returns, title="Daily returns", ylabel="Daily returns")

	# Plot a histogram
	daily_returns.hist(bins=20) # changing # of bins to 20

	# Get mean and standard deviation
	mean = daily_returns['SPY'].mean()
	print "mean =", mean

	std = daily_returns['SPY'].std()
	print "std =", std

	plt.axvline(mean,color='w',linestyle='dashed',linewidth=2)
	plt.axvline(std,color='r',linestyle='dashed',linewidth=2)
	plt.axvline(-std,color='r',linestyle='dashed',linewidth=2)
	plt.show()

	# Compute kurtosis
	print daily_returns.kurtosis()
Пример #15
0
def get_vals(start_date, end_date, symbol):
    # Read in adjusted closing prices for given symbols, date range
    # to allow getting SMA from day 1 on start_date we initially read in an earlier date to calc SMA
    dates = pd.date_range(start_date, end_date)
    prices_all = util.get_data(symbol, dates)  # automatically adds SPY
    prices = prices_all[symbol]  # only portfolio symbols       
    return prices
Пример #16
0
def define_y(symbol, startdate_string ='12/31/07', enddate_string ='12/31/09', window=5):
    """

    :param symbol: STRING
    :param startdate_string: STRING 'MM/DD/YY'
    :param enddate_string: STRING 'MM/DD/YY'
    :param window: size of rolling averages for 5 day forecast
    :return: data, data_np.  Features in both Pandas and Numpy formats.  4 columns each of ['bb_value', 'momentum', 'daily_returns', 'volatility']
    """
    # Import Orders into DataFrame (CURRENTLY HAS ALL DATES including non-trading)
    start_date = pd.to_datetime(startdate_string) #StartDate per Instructions
    end_date = pd.to_datetime(enddate_string) #EndDate per Instructions
    dates = pd.date_range(start_date, end_date)

    symbols = [symbol, '$SPX']

    # Read in adjusted closing prices for given symbols, date range
    prices_all = get_data(symbols, dates)  # automatically adds SPY
    prices = prices_all[[symbol]]  # only portfolio symbols
    #prices_np = prices.as_matrix()
    #index_df = prices.index

    # Compute SMA
    sma = pd.rolling_mean(prices, window)
    sma.columns = prices.columns
    #sma_np = sma.as_matrix()

    y = (prices.shift(-5)/prices)-1
    y_np = y.as_matrix().transpose() #need to transpose y. As a 1d Output variable, need to have it be a series.  Not sure why, but whatever

    return y, y_np, prices
Пример #17
0
def get_indicators(start_date, end_date, symbols):
    """Simulate and assess the performance of a stock portfolio."""
    # Read in adjusted closing prices for given symbols, date range
    dates = pd.date_range(start_date, end_date)
    prices_all = get_data(symbols, dates)  # automatically adds SPY
    prices = prices_all[symbols]  # only portfolio symbols
    # prices_SPY = prices_all['SPY']  # only SPY, for comparison later

    sym = symbols[1]

    x1 = (prices[sym] - pd.rolling_mean(prices[sym], 20)) / (2 * pd.rolling_std(prices[sym], 20))
    x1_dis = pd.cut(x1, 10, labels=False)

    x2 = prices[sym].pct_change(20)
    x2_dis = pd.cut(x2, 10, labels=False)
    x3 = pd.rolling_std(prices[sym].pct_change(1), 20)
    x3_dis = pd.cut(x3, 10, labels=False)

    # return pd.concat([x1_,x2_0,x3_0], axis=1).dropna(), prices
    tempdf = pd.concat([x1_dis, x2_dis, x3_dis], axis=1).dropna()
    tempdf.columns = ["x1", "x2", "x3"]

    print tempdf.dtypes

    tempdf["holding"] = np.random.randint(0, 3, size=len(tempdf))
    # 0 = no position , 1 = negative positin 2 =holding long
    tempdf["s"] = 1000 * tempdf["holding"] + 100 * tempdf["x3"] + 10 * tempdf["x2"] + 1 * tempdf["x1"]
    print tempdf.head(50)
    return tempdf, prices
Пример #18
0
def compute_portvals(start_date, end_date, orders_file, start_val):
	"""Compute daily portfolio value given a sequence of orders in a CSV file.

	Parameters
	----------
	    start_date: first date to track
	    end_date: last date to track
	    orders_file: CSV file to read orders from
	    start_val: total starting cash available

	Returns
	-------
	    portvals: portfolio value for each trading day from start_date to end_date (inclusive)
	"""
	dates = pd.date_range(start_date, end_date)
	orders = construct_orders(orders_file, dates)
	# print orders
	symbols = list(set(orders.Symbol))

	prices_all = get_data(symbols, dates)
	prices = prices_all[symbols]
	trades = calculate_trades(prices.index, orders, symbols)
	# print trades

	holdings = pd.DataFrame(index=prices.index)
	holdings['cash'] = start_val + (-1.0 * (prices * trades).sum(axis=1)).cumsum()
	holdings['stock'] = (prices * trades.cumsum()).sum(axis=1)
	portvals = holdings.cash + holdings.stock
	return portvals
Пример #19
0
def test_run():
    # Read data
    dates = pd.date_range('2009-01-01', '2012-12-31')  # one month only
    symbols = ['SPY','XOM','GLD']
    df = get_data(symbols, dates)
    plot_data(df)

    # Compute daily returns
    daily_returns = compute_daily_returns(df)
    #plot_data(daily_returns, title="Daily returns", ylabel="Daily returns")

    # Scatterplot SPY vs XOM
    daily_returns.plot(kind='scatter',x='SPY',y='XOM')
    beta_XOM,alpha_XOM=np.polyfit(daily_returns['SPY'],daily_returns['XOM'],1)
    print "beta_XOM= ",beta_XOM
    print "alpha_XOM= ",alpha_XOM
    plt.plot(daily_returns['SPY'],beta_XOM*daily_returns['SPY']+alpha_XOM,'-',color='r')
    plt.grid()
    plt.show()

    # Scatterplot SPY vs GLD
    daily_returns.plot(kind='scatter',x='SPY',y='GLD')
    beta_GLD,alpha_GLD=np.polyfit(daily_returns['SPY'],daily_returns['GLD'],1)
    print "beta_GLD= ",beta_GLD
    print "alpha_GLD= ",alpha_GLD
    plt.plot(daily_returns['SPY'],beta_GLD*daily_returns['SPY']+alpha_GLD,'-',color='r')
    plt.grid()
    plt.show()

    # Calculate correlation coefficient
    print daily_returns.corr(method='pearson')
def optimize_portfolio(sd=dt.datetime(2008,1,1), ed=dt.datetime(2009,1,1), \
    syms=['GOOG','AAPL','GLD','XOM'], gen_plot=False):

    # Read in adjusted closing prices for given symbols, date range
    dates = pd.date_range(sd, ed)
    prices_all = get_data(syms, dates)  # automatically adds SPY
    prices = prices_all[syms]  # only portfolio symbols
    prices_SPY = prices_all['SPY']  # only SPY, for comparison later

    # find the allocations for the optimal portfolio
    # note that the values here ARE NOT meant to be correct for a test case
     # add code here to find the allocations
    
    x0 = np.random.random(len(syms))
    x0 /= x0.sum()
    # x0=np.asarray([0.2, 0.2, 0.3, 0.3, 0.0])
    fun = lambda x: -sharp_ratio(prices.values,x)
    cons = ({ 'type': 'eq', 'fun': lambda inputs: 1 - np.sum(inputs) })
    bnds = tuple((0,None) for i in range(len(syms)))
    res = minimize(fun, x0 , method='SLSQP', bounds=bnds, constraints=cons)

    allocs = res.x

    cr, adr, sddr, sr = [0.25, 0.001, 0.0005, 2.1] # add code here to compute stats

    priceSPY=prices_SPY.values
    priceSPY /= priceSPY[0]

    price_stocks = prices.values
    price_stocks /= price_stocks[0]
    price_stocks *= allocs

    port_val = pd.DataFrame(price_stocks.sum(axis=1),index=prices.index)
    prices_SPY = pd.DataFrame(priceSPY,index=prices.index)



    # Get daily portfolio value
    # port_val = prices_SPY # add code here to compute daily portfolio values

    cr = port_val.values[-1] -1
    dr = port_val.values
    drShift = np.vstack([dr[0],dr[0:(len(dr)-1)]])
    dr = dr/drShift -1

    adr = dr.mean()
    sddr=dr.std()

    k = math.sqrt(252)

    sr = k*np.mean(dr)/np.std(dr)

    # Compare daily portfolio value with SPY using a normalized plot
    if gen_plot:
        # add code to plot here
        df_temp = pd.concat([port_val, prices_SPY], keys=['Portfolio', 'SPY'], axis=1)
        df_temp.columns=df_temp.columns.get_level_values(0)
        plot_data(df_temp, "Daily portfolio value and SPY", "Date", "Normalized prices")

    return allocs, cr, adr, sddr, sr
def optimize_portfolio(start_date, end_date, symbols):
    """Simulate and optimize portfolio allocations."""
    # Read in adjusted closing prices for given symbols, date range
    dates = pd.date_range(start_date, end_date)
    prices_all = get_data(symbols, dates)  # automatically adds SPY
    prices = prices_all[symbols]  # only portfolio symbols
    prices_SPY = prices_all['SPY']  # only SPY, for comparison later

    # Get optimal allocations


    allocs = find_optimal_allocations(prices)

    allocs = allocs / np.sum(allocs)  # normalize allocations, if they don't sum to 1.0

    # Get daily portfolio value (already normalized since we use default start_val=1.0)
    port_val = get_portfolio_value(prices, allocs)

    # Get portfolio statistics (note: std_daily_ret = volatility)
    cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio = get_portfolio_stats(port_val)

    # Print statistics
    print "Start Date:", start_date
    print "End Date:", end_date
    print "Symbols:", symbols
    print "Optimal allocations:", allocs
    print "Sharpe Ratio:", sharpe_ratio
    print "Volatility (stdev of daily returns):", std_daily_ret
    print "Average Daily Return:", avg_daily_ret
    print "Cumulative Return:", cum_ret

    # Compare daily portfolio value with normalized SPY
    normed_SPY = prices_SPY / prices_SPY.ix[0, :]
    df_temp = pd.concat([port_val, normed_SPY], keys=['Portfolio', 'SPY'], axis=1)
    plot_data(df_temp, title="Daily Portfolio Value and SPY")
Пример #22
0
def compute_portvals(start_date, end_date, orders_file, start_val):
    """Compute daily portfolio value given a sequence of orders in a CSV file.

    Parameters
    ----------
        start_date: first date to track
        end_date: last date to track
        orders_file: CSV file to read orders from
        start_val: total starting cash available

    Returns
    -------
        portvals: portfolio value for each trading day from start_date to end_date (inclusive)
    """
    # TODO: Your code here
    #create df_prices
    df_temp = pd.read_csv(orders_file, index_col='Date', parse_dates=True)
    symbols = []
    for index,row in df_temp.iterrows():		
    	symbols.append(row['Symbol'])
    symbols = list(set(symbols))
    dates = pd.date_range(start_date, end_date)
    df_prices = get_data(symbols, dates)
    df_prices = df_prices.drop('SPY',1)
    df_prices['CASH'] = 1.0
    #print df_prices
    
    #Create df_trade.
    #Check for leverage by create a curr_list that save the cumulative holding.
    #When a new order comes, create a temp_list with update holding and multiply it with current prices, 
    #then check to see if leverage exceeds 2 or not. If it's not, then process the order,
    #change curr_list to temp_list and update df_trade
    #If it exceeds 2, then don't process the order and do nothing 
    df_trade = df_prices.copy()
    df_trade[df_trade != 0] = 0
    df_trade.ix[start_date,'CASH'] = start_val
    curr_list = df_trade.ix[start_date].copy()
    for index, row in df_temp.iterrows():
    	temp_list = curr_list.copy()
    	temp_list.ix[row['Symbol']] += (1 if row['Order'] == 'BUY' else -1)*float(row['Shares'])
    	temp_list.ix['CASH'] += (-1 if row['Order'] == 'BUY' else 1)*float(row['Shares'])*df_prices.ix[index,row['Symbol']]
    	sum_abs_all  = abs(temp_list).dot(df_prices.ix[index])
    	sum_cash = abs(temp_list['CASH'])
    	sum_all = temp_list.dot(df_prices.ix[index])
    	leverage = (sum_abs_all-sum_cash)/sum_all
    	#print df_prices.ix[index,row['Symbol']], sum_abs_all , sum_cash, sum_all, leverage
    	if (leverage <= 2.0):
    		curr_list = temp_list.copy()
    		df_trade.ix[index,row['Symbol']] += (1 if row['Order'] == 'BUY' else -1)*float(row['Shares'])
    		df_trade.ix[index,'CASH'] += (-1 if row['Order'] == 'BUY' else 1)*float(row['Shares'])*df_prices.ix[index,row['Symbol']]
    #print df_trade
    
    #calculate holding from df_trade and portvals
    portvals = pd.Series(index = df_prices.index)
    portvals.ix[0] = df_prices.ix[0].dot(df_trade.ix[0])
    for i in range(1,df_trade.shape[0]):
    	df_trade.ix[i] += df_trade.ix[i-1]
    	portvals.ix[i] = df_prices.ix[i].dot(df_trade.ix[i])
    print portvals
    return portvals
Пример #23
0
def pseknc(input_data, k, w, lamada, phyche_list, alphabet, extra_index_file=None, all_prop=False, theta_type=1):
    """This is a complete process in PseKNC.

    :param k: int, the value of k-tuple.
    :param phyche_list: list, the input physicochemical properties list.
    :param extra_index_file: a file path includes the user-defined phyche_index.
    :param all_prop: bool, choose all physicochemical properties or not.
    """
    phyche_list = get_phyche_list(k, phyche_list,
                                  extra_index_file=extra_index_file, alphabet=alphabet, all_prop=all_prop)
    # Get phyche_vals.
    if alphabet == index_list.DNA or alphabet == index_list.RNA:
        if extra_index_file is not None:
            extra_phyche_index = get_extra_index(extra_index_file)
            from util import normalize_index

            phyche_vals = get_phyche_value(k, phyche_list, alphabet,
                                           normalize_index(extra_phyche_index, alphabet, is_convert_dict=True))
        else:
            phyche_vals = get_phyche_value(k, phyche_list, alphabet)
    elif alphabet == index_list.PROTEIN:
        phyche_vals = get_aaindex(phyche_list)
        if extra_index_file is not None:
            phyche_vals.extend(extend_aaindex(extra_index_file))

    seq_list = get_data(input_data, alphabet)

    return make_pseknc_vector(seq_list, phyche_vals, k, w, lamada, alphabet, theta_type)
Пример #24
0
def assess_portfolio(start_date, end_date, symbols, allocs, start_val=1):
    """Simulate and assess the performance of a stock portfolio."""
    # Read in adjusted closing prices for given symbols, date range
    dates = pd.date_range(start_date, end_date)
    prices_all = get_data(symbols, dates)  # automatically adds SPY
    prices = prices_all[symbols]  # only portfolio symbols
    prices_SPY = prices_all['SPY']  # only SPY, for comparison later

    # Get daily portfolio value
    port_val = get_portfolio_value(prices, allocs, start_val)
    plot_data(port_val, title="Daily Portfolio Value")

    # Get portfolio statistics (note: std_daily_ret = volatility)
    cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio = get_portfolio_stats(port_val)

    # Print statistics
    print "Start Date:", start_date
    print "End Date:", end_date
    print "Symbols:", symbols
    print "Allocations:", allocs
    print "Sharpe Ratio:", sharpe_ratio
    print "Volatility (stdev of daily returns):", std_daily_ret
    print "Average Daily Return:", avg_daily_ret
    print "Cumulative Return:", cum_ret

    # Compare daily portfolio value with SPY using a normalized plot
    df_temp = pd.concat([port_val, prices_SPY], keys=['Portfolio', 'SPY'], axis=1)
    plot_normalized_data(df_temp, title="Daily portfolio value and SPY")
def assess_portfolio(sd = dt.datetime(2008,1,1), ed = dt.datetime(2009,1,1), \
    syms = ['GOOG','AAPL','GLD','XOM'], \
    allocs=[0.1,0.2,0.3,0.4], \
    sv=1000000, rfr=0.0, sf=252.0, \
    gen_plot=False):

    # Read in adjusted closing prices for given symbols, date range
    dates = pd.date_range(sd, ed)
    prices_all = get_data(syms, dates)  # automatically adds SPY
    prices = prices_all[syms]  # only portfolio symbols

    prices_SPY = prices_all['SPY']  # only SPY, for comparison later



    # Get daily portfolio value
    #port_val = prices_SPY # add code here to compute daily portfolio values


    priceSPY=prices_SPY.values
    priceSPY /= priceSPY[0]

    price_stocks = prices.values
    price_stocks /= price_stocks[0]
    price_stocks *= allocs

    port_val = pd.DataFrame(price_stocks.sum(axis=1),index=prices.index)
    prices_SPY = pd.DataFrame(priceSPY,index=prices.index)


    # Get portfolio statistics (note: std_daily_ret = volatility)
    cr, adr, sddr, sr = [0.25, 0.001, 0.0005, 2.1] # add code here to compute stats
    cr = port_val.values[-1] -1

    dr = port_val.values
    drShift = np.vstack([dr[0],dr[0:(len(dr)-1)]])
    dr = dr/drShift -1

    adr = dr.mean()
    sddr=dr.std()

    k = math.sqrt(sf)

    sr = k*np.mean(dr-rfr)/np.std(dr-rfr)

    # Compare daily portfolio value with SPY using a normalized plot

    if gen_plot:
        # add code to plot here
        df_temp = pd.concat([port_val, prices_SPY], keys=['Portfolio', 'SPY'], axis=1)
        df_temp.columns=df_temp.columns.get_level_values(0)
        plot_data(df_temp, "Daily portfolio value and SPY", "Date", "Normalized prices")


    # Add code here to properly compute end value

    ev = sv*port_val.values[-1]

    return cr, adr, sddr, sr, ev
Пример #26
0
def compute_insample_data_IBM(start_date, end_date):
    dates = pd.date_range(start_date, end_date)
    prices = get_data(["IBM"], dates, True)
    prices = prices.drop("SPY", axis=1)

    # calculating volatility
    daily_returns = prices.copy()
    daily_returns[1:] = (prices[1:] / prices[:-1].values) - 1.0
    daily_returns.ix[0, :] = 0
    vol = pd.rolling_std(daily_returns, window=10)
    vol1 = vol[9:-5]

    # calculating momentum
    momentum = prices.copy()
    momentum[9:] = (prices[9:] / prices[:-9].values) - 1.0
    momentum1 = momentum[9:-5]

    # calculating bollinger values
    sma = pd.rolling_mean(prices, window=10)
    sma1 = sma.dropna()
    std = pd.rolling_std(prices, window=10)
    std1 = std[9:]
    bb_prices = prices[9:]
    bb_value = (bb_prices - sma1) / (2 * std1)
    bb_value1 = bb_value[0:-5]

    # shifting prices
    shifted_prices = prices.shift(-5)
    future_prices = prices.copy()
    prices1 = prices[9:-5]
    future_return = (shifted_prices / prices) - 1.0
    future_return1 = future_return[9:-5]

    vol_array = vol1.values
    momentum_array = momentum1.values
    bb_value_array = bb_value1.values

    data = np.concatenate((vol_array, momentum_array, bb_value_array, future_return1), axis=1)
    predY = knn_learner(data)
    predY_df_return = pd.DataFrame(predY, index=future_return1.index, columns=["predY"])
    predY_df_price = prices1 * (predY_df_return.values + 1)
    future_prices = prices1 * (future_return1.values + 1)

    predY_df_price = predY_df_price.rename(columns={"IBM": "Predicted Y"})
    ax = predY_df_price.plot(title="Training Y/Price/Predicted Y: 2008-2009")

    future_prices = future_prices.rename(columns={"IBM": "Training Y"})
    prices1 = prices1.rename(columns={"IBM": "Price"})
    future_prices.plot(ax=ax)
    prices1.plot(ax=ax)
    start_date = "2008-01-15"
    end_date = "2009-12-23"
    my_strategy_IBM(prices1, predY_df_price, start_date, end_date, "IBM_insample_orders", "Strategy 2008-2009")

    start_date = "2010-01-01"
    end_date = "2010-12-31"
    compute_outsample_data(data, start_date, end_date)
Пример #27
0
def test_run():
    dates=pd.date_range('2010-01-01', '2010-12-31')
    symbols =['GOOG','IBM','GLD']

    df1 = get_data(symbols, dates)
    # print df1.ix['2010-03-01':'2010-04-01', ['SPY','IBM']]
    # print df1
    # plot_data(df1)
    plot_selected(df1, ['SPY', 'IBM', 'GOOG'], '2010-03-01', '2010-04-01')
Пример #28
0
def compute_outsample_data_IBM(traindata,start_date,end_date):
    dates=pd.date_range(start_date,end_date)
    prices=get_data(['IBM'],dates,True)
    prices=prices.drop('SPY',axis=1)
    
    #calculating volatility
    daily_returns = prices.copy()
    daily_returns[1:] = (prices[1:]/prices[:-1].values)-1.0
    daily_returns.ix[0,:]=0
    vol=pd.rolling_std(daily_returns,window=10)
    vol1=vol[9:-5]
    
    #calculating momentum
    momentum=prices.copy()
    momentum[9:] = (prices[9:]/prices[:-9].values)-1.0
    momentum1=momentum[9:-5]
    
    #calculating bollinger bands value
    sma = pd.rolling_mean(prices,window=10)
    sma1 = sma.dropna()
    std=pd.rolling_std(prices,window=10)
    std1=std[9:]
    bb_prices=prices[9:]
    bb_value=(bb_prices - sma1)/(2*std1)
    bb_value1=bb_value[0:-5]
    
    #calculating 5 day shifted prices
    shifted_prices=prices.shift(-5)
    future_prices = prices.copy()
    prices1=prices[9:-5]
    future_return=(shifted_prices/prices) - 1.0
    future_return1=future_return[9:-5]
    
    vol_array=vol1.values
    momentum_array=momentum1.values
    bb_value_array=bb_value1.values
    
    data=np.concatenate((vol_array,momentum_array,bb_value_array,future_return1), axis=1)
    
    #getting predicted Y from knn
    print
    print "KNN Learner Statistics"
    print
    predY = knn_learner_test(traindata,data)
    predY_df_return=pd.DataFrame(predY,index=future_return1.index,columns=['predY'])
    predY_df_price=prices1*(predY_df_return.values+1)
    future_prices=prices1*(future_return1.values+1)
    
    predY_df_price=predY_df_price.rename(columns={'IBM':'Predicted Y'})
    future_prices = future_prices.rename(columns={'IBM':'Training Y'})
    prices1 = prices1.rename(columns={'IBM':'Price'})
    start_date='2010-01-15'
    end_date='2010-12-23'
    print
    print "IBM Out of Sample Satistics"
    print
    my_strategy_IBM(prices1,predY_df_price,start_date,end_date,"IBM_outsample_orders","IBM Data Out of Sample Entries/Exits","IBM Data Out of Sample Backtest")
Пример #29
0
def compute_portvals(start_date, end_date, orders_file, start_val):
    """Compute daily portfolio value given a sequence of orders in a CSV file.

    Parameters
    ----------
        start_date: first date to track
        end_date: last date to track
        orders_file: CSV file to read orders from
        start_val: total starting cash available

    Returns
    -------
        portvals: portfolio value for each trading day from start_date to end_date (inclusive)
    """
    df_orders = pd.read_csv(orders_file, index_col='Date', parse_dates=True, 
                            usecols=['Date', 'Symbol', 'Order', 'Shares'])
    symbols = list(set(df_orders['Symbol']))
    
    dates = pd.date_range(start_date, end_date)
    prices_all = get_data(symbols, dates)  # automatically adds SPY
    prices = prices_all[symbols]
    
    cash = start_val
    holdings = {symbol:0 for symbol in symbols}
    portvals = pd.Series(index=prices.index)
    for date in prices.index:
        if date in df_orders.index:
            df_orders2 = df_orders.ix[date:date]
            if len(df_orders2.shape) == 1:
                symbol = df_orders2.ix['Symbol']
                order = df_orders2.ix['Order']
                shares = df_orders2.ix['Shares']
                if order == 'SELL':
                    shares = -shares
                if symbol in holdings.keys():
                    holdings[symbol] += shares
                else:
                    holdings[symbol] = shares
                cash -= prices.ix[date, symbol]*shares
            else:
                for i in range(len(df_orders2)):
                    symbol = df_orders2.ix[i,'Symbol']
                    order = df_orders2.ix[i,'Order']
                    shares = df_orders2.ix[i,'Shares']
                    if order == 'SELL':
                        shares = -shares
                    if symbol in holdings.keys():
                        holdings[symbol] += shares
                    else:
                        holdings[symbol] = shares
                    cash -= prices.ix[date, symbol]*shares
        stocksval = 0
        for k,v in holdings.iteritems():
            stocksval += prices.ix[date, k]*v
        portvals.ix[date] = cash+stocksval
    return portvals
Пример #30
0
def test_run():
    """Driver function."""
    # Define input parameters
    # Test 1
#    start_date = '2011-01-05'
#    end_date = '2011-01-20'
#    orders_file = os.path.join(".\orders", "orders-short.csv")
#    start_val = 1000000
    
    # Test 2
#    start_date = '2011-01-10'
#    end_date = '2011-12-20'
#    orders_file = os.path.join(".\orders", "orders.csv")
#    start_val = 1000000
    
    # Test 3
    start_date = '2011-01-14'
    end_date = '2011-12-14'
    orders_file = os.path.join(".\orders", "orders2.csv")
    start_val = 1000000
    
    
    # Process orders
    portvals = compute_portvals(start_date, end_date, orders_file, start_val)
    if isinstance(portvals, pd.DataFrame):
        portvals = portvals[portvals.columns[0]]  # if a DataFrame is returned select the first column to get a Series
    
    # Get portfolio stats
    cum_ret, avg_daily_ret, std_daily_ret, sharpe_ratio = get_portfolio_stats(portvals)

    # Simulate a $SPX-only reference portfolio to get stats
    prices_SPX = get_data(['$SPX'], pd.date_range(start_date, end_date))
    prices_SPX = prices_SPX[['$SPX']]  # remove SPY
    portvals_SPX = get_portfolio_value(prices_SPX, [1.0])
    cum_ret_SPX, avg_daily_ret_SPX, std_daily_ret_SPX, sharpe_ratio_SPX = get_portfolio_stats(portvals_SPX)

    # Compare portfolio against $SPX
    print "Data Range: {} to {}".format(start_date, end_date)
    print
    print "Sharpe Ratio of Fund: {}".format(sharpe_ratio)
    print "Sharpe Ratio of $SPX: {}".format(sharpe_ratio_SPX)
    print
    print "Cumulative Return of Fund: {}".format(cum_ret)
    print "Cumulative Return of $SPX: {}".format(cum_ret_SPX)
    print
    print "Standard Deviation of Fund: {}".format(std_daily_ret)
    print "Standard Deviation of $SPX: {}".format(std_daily_ret_SPX)
    print
    print "Average Daily Return of Fund: {}".format(avg_daily_ret)
    print "Average Daily Return of $SPX: {}".format(avg_daily_ret_SPX)
    print
    print "Final Portfolio Value: {}".format(portvals[-1])

    # Plot computed daily portfolio value
    df_temp = pd.concat([portvals, prices_SPX['$SPX']], keys=['Portfolio', '$SPX'], axis=1)
    plot_normalized_data(df_temp, title="Daily portfolio value and $SPX")
def run_simulations(symbol = "IBM", \
        sd_train = dt.datetime(2007,12,31), \
        ed_train = dt.datetime(2009,12,31),\
        sd_test = dt.datetime(2009,12,31),\
        ed_test = dt.datetime(2011,12,31), \
        sv =10000, \
        alpha=0.2, \
        rar=0.98, \
        radr=0.99, \
        window=15, \
        num_simulation = 10, \
        plot_results = False, \
        verbose = False):

    syms = [symbol]

    # read in training data
    train_dates = pd.date_range(sd_train, ed_train)
    train_prices_all = ut.get_data(syms, train_dates)
    train_prices = train_prices_all[syms]

    # read in testing data
    dates = pd.date_range(sd_test, ed_test)
    prices_all = ut.get_data(syms, dates)
    prices = prices_all[syms]

    # compute benchmark
    train_cumulative_return_buy_hold_strategy = (
        (train_prices.ix[-1, :][0] - train_prices.ix[0, :][0]) * 100 +
        sv) / sv - 1
    if verbose:
        print "cumulative return of buy-and-hold strategy on training:",\
                train_cumulative_return_buy_hold_strategy

    cumulative_return_buy_hold_strategy = (
        (prices.ix[-1, :][0] - prices.ix[0, :][0]) * 100 + sv) / sv - 1
    if verbose:
        print "cumulative return of buy-and-hold strategy on testing:",\
                cumulative_return_buy_hold_strategy

    cumulative_returns_train = np.zeros(num_simulation)
    cumulative_returns_test = np.zeros(num_simulation)

    for i in range(0, num_simulation):
        # instantiate the strategy learner
        learner = sl.StrategyLearner(alpha=alpha,\
                rar = rar,\
                radr = radr, \
                verbose = False)

        # learning
        cumulative_return = learner.addEvidence(symbol = symbol,\
                    sd = sd_train, \
                    ed = ed_train, sv = 10000)

        if plot_results:
            plt.plot(cumulative_return)

        #save the final result
        cumulative_returns_train[i] = cumulative_return[-1]
        if verbose:
            print "cumulative_return of training:", cumulative_returns_train[i]

        # test the learner
        df_trades, cumulative_returns_test[i] = learner.testPolicy(symbol = symbol, sd = sd_test, \
        ed = ed_test, sv = 10000)

        if plot_results:
            plt.title("Cumulative return on training set of ten simulations")
            plt.ylabel("Cumulative return")
            plt.xlabel("Trials")
            plt.show()

    if verbose:
        print "cumulative_returns_train", cumulative_returns_train
        print "cumulative_returns_test", cumulative_returns_test

    avg_cumulative_returns_train = np.mean(cumulative_returns_train)
    avg_cumulative_returns_test = np.mean(cumulative_returns_test)

    return (avg_cumulative_returns_train, avg_cumulative_returns_test)
Пример #32
0
            max_votes = 0
            max_votes_class = -1
            for v, count in votes.items():  # we loop through the votes
                if count > max_votes:  # if this vote is grater than our max_votes we make this vote our max_votes and we store the count of that vote
                    max_votes = count
                    max_votes_class = v
                y[i] = max_votes_class  # we set yi to the corresponding class
        return y

    def score(self, X, Y):
        P = self.predict(X)
        return np.mean(P == Y)


if __name__ == '__main__':
    X, y = get_data(2000)
    #X,y = get_xor()
    #X,y = get_donut()
    Ntrain = 1000
    X_train, y_train = X[:Ntrain], y[:Ntrain]
    X_test, y_test = X[Ntrain:], y[Ntrain:]
    for k in (1, 2, 3, 4, 5):
        knn = KNN(k)
        print(f"\nThis is for K = {k} \n\n")
        t0 = datetime.now()
        knn.fit(X_train, y_train)
        print(f'Training time: {datetime.now()-t0}')
        knn.predict(X_test)
        t0 = datetime.now()
        print(f"Train accuracy: {knn.score(X_train,y_train)}")
        print(
Пример #33
0
    def addEvidence(self, symbol = "IBM", \
        sd=dt.datetime(2008,1,1), \
        ed=dt.datetime(2009,1,1), \
        sv = 10000):

        # add your code to do learning here

        # compute the technical indicators
        sym = [symbol]
        momentum3, sma_ratio3, bbp3 = ind.indicators(sd, ed, sym, 70, False)
        momentum14, sma_ratio14, bbp14 = ind.indicators(sd, ed, sym, 14, False)

        # create feature array and discretize the values of the features
        row = momentum3.values[:, 0].size - 70
        features = np.zeros((row, 7))
        features[:, 0] = momentum3.ix[70:, symbol].values
        features[:, 1] = sma_ratio3.ix[70:, symbol].values
        features[:, 2] = bbp3.ix[70:, symbol].values
        features[:, 3] = momentum14.ix[70:, symbol].values
        features[:, 4] = sma_ratio14.ix[70:, symbol].values
        features[:, 5] = bbp14.ix[70:, symbol].values

        fmin = features.min(axis=0)
        fmax = features.max(axis=0)
        for i in range(6):
            bins = np.linspace(fmin[i], fmax[i], 9)
            features[:, i] = np.digitize(features[:, i], bins)
        # print features[0:30, ]

        for i in range(row):
            features[i, 6] = int(
                str(int(features[i, 0])) + str(int(features[i, 1])) +
                str(int(features[i, 2])) + str(int(features[i, 3])) +
                str(int(features[i, 4])) + str(int(features[i, 5])))
        # print features[0:30, ]

        # Read in the SPY & symbol data (adj_close) using util.py
        dates = pd.date_range(sd, ed)
        prices_all = get_data(sym, dates)  # automatically adds SPY
        price = prices_all / prices_all.ix[0, :]
        price = price.ix[70:, symbol].values
        # print(price[0:9])

        ## initiate the qlearner
        pre_action = 0  ## track the previous action
        cur_action = self.learner.querysetstate(int(features[0, 6]))
        ##print("state: ", features[0, 6], "action", cur_action)

        ## update the qlearner until converge
        i = 1
        total_reward = 0
        last_reward = 0
        while i < row:
            cur_state = int(str(int(features[i, 6])) + str(int(pre_action)))

            ##print("days of ", i, "state: ", cur_state, "action: ", cur_action)

            if cur_action == 1:  ## buy and long 1000
                if pre_action == 0:
                    cur_reward = (price[i] - price[i - 1]
                                  ) * 1000 - price[i - 1] * 1000 * self.impact
                elif pre_action == 1:
                    cur_reward = (price[i] - price[i - 1]) * 1000
                elif pre_action == 2:
                    cur_reward = (price[i] - price[i - 1]
                                  ) * 1000 - price[i - 1] * 2000 * self.impact
            elif cur_action == 2:  ## sell and short 1000
                if pre_action == 0:
                    cur_reward = (price[i - 1] - price[i]
                                  ) * 1000 - price[i - 1] * 1000 * self.impact
                elif pre_action == 1:
                    cur_reward = (price[i - 1] - price[i]
                                  ) * 1000 - price[i - 1] * 2000 * self.impact
                elif pre_action == 2:
                    cur_reward = (price[i - 1] - price[i]) * 1000
            else:  ## no holding
                if pre_action == 0:
                    cur_reward = 0
                elif pre_action == 1:
                    cur_reward = -price[i - 1] * 1000 * self.impact
                elif pre_action == 2:
                    cur_reward = -price[i - 1] * 1000 * self.impact

            total_reward = total_reward + cur_reward
            action = self.learner.query(cur_state, cur_reward)

            ##print("current reward: ", cur_reward, "next_action: ", action)
            pre_action = cur_action
            cur_action = action
            i = i + 1

        j = 0
        while total_reward != last_reward:
            ## initiate the qlearner
            pre_action = 0  ## track the previous action
            cur_action = self.learner.querysetstate(
                int(str(int(features[0, 6])) + str(int(pre_action))))

            ##print "total reward is ", total_reward, " last_reward is ", last_reward
            i = 1
            last_reward = total_reward
            total_reward = 0
            while i < row:
                cur_state = int(
                    str(int(features[i, 6])) + str(int(pre_action)))

                if cur_action == 1:  ## buy and long 1000
                    if pre_action == 0:
                        cur_reward = (price[i] - price[i - 1]) * 1000 - price[
                            i - 1] * 1000 * self.impact
                    elif pre_action == 1:
                        cur_reward = (price[i] - price[i - 1]) * 1000
                    elif pre_action == 2:
                        cur_reward = (price[i] - price[i - 1]) * 1000 - price[
                            i - 1] * 2000 * self.impact
                elif cur_action == 2:  ## sell and short 1000
                    if pre_action == 0:
                        cur_reward = (price[i - 1] - price[i]) * 1000 - price[
                            i - 1] * 1000 * self.impact
                    elif pre_action == 1:
                        cur_reward = (price[i - 1] - price[i]) * 1000 - price[
                            i - 1] * 2000 * self.impact
                    elif pre_action == 2:
                        cur_reward = (price[i - 1] - price[i]) * 1000
                else:  ## no holding
                    if pre_action == 0:
                        cur_reward = 0
                    elif pre_action == 1:
                        cur_reward = -price[i - 1] * 1000 * self.impact
                    elif pre_action == 2:
                        cur_reward = -price[i - 1] * 1000 * self.impact

                total_reward = total_reward + cur_reward
                action = self.learner.query(cur_state, cur_reward)
                '''
                if (cur_reward != 0):
                    print "\ndays of ", i, "state: ", cur_state, "action: ", cur_action, "pre-action: ", pre_action
                    print "price[i] ", price[i], "  price[i-1] ", price[i - 1], "  impact ", self.impact
                    print "current reward: ", cur_reward, "next_action: ", action
                '''
                pre_action = cur_action
                cur_action = action
                i = i + 1

            j = j + 1
Пример #34
0
        P = self.predict(X)
        return np.mean(P == Y)

    def predict(self, X):
        N, D = X.shape
        K = len(self.gaussians)
        P = np.zeros((N, K))
        for c, g in self.gaussians.iteritems():
            # print "c:", c
            mean, var = g['mean'], g['var']
            P[:,c] = mvn.logpdf(X, mean=mean, cov=var) + np.log(self.priors[c])
        return np.argmax(P, axis=1)


if __name__ == '__main__':
    X, Y = get_data(10000)
    Ntrain = len(Y) / 2
    Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain]
    Xtest, Ytest = X[Ntrain:], Y[Ntrain:]

    model = NaiveBayes()
    t0 = datetime.now()
    model.fit(Xtrain, Ytrain)
    print "Training time:", (datetime.now() - t0)

    t0 = datetime.now()
    print "Train accuracy:", model.score(Xtrain, Ytrain)
    print "Time to compute train accuracy:", (datetime.now() - t0), "Train size:", len(Ytrain)

    t0 = datetime.now()
    print "Test accuracy:", model.score(Xtest, Ytest)
Пример #35
0
def compute_portvals(order, start_val=100000, commission=0, impact=0):
    # this is the function the autograder will call to test your code
    # NOTE: orders_file may be a string, or it may be a file object. Your
    # code should work correctly with either input
    # TODO: Your code here
    order = order.sort_index()
    #print(order.head(5))
    sym = "JPM"
    #print(sym)
    start_date = order.index.values[0]
    end_date = order.index.values[-1]
    date_range = pd.date_range(start_date, end_date)

    prices = get_data([sym], date_range)
    #print(prices.head())
    prices['Cash'] = 1.00
    #print(prices.head())

    trade = pd.DataFrame(index=prices.index, columns=prices.columns)
    trade = trade.fillna(0)
    trade['Cash'].iloc[0] = start_val

    #order = orders_df.iloc[0]
    #print(order)
    for idx, row in order.iterrows():
        order_price = prices[sym].loc[idx]
        order_units = row[0]
        #        if row == "BUY":
        #            s = -1
        #        else:
        #            s = 1
        #print(order_units)
        #print(order_price)
        trade.loc[idx, sym] += order_units
        trade.loc[idx, "Cash"] += order_units * order_price * -1
        trade.loc[idx, "Cash"] -= commission
        share_impact = abs(order_units) * order_price * impact
        trade.loc[idx, "Cash"] -= share_impact

    for i in range(1, trade.shape[0]):
        for j in range(0, trade.shape[1]):
            trade.iloc[i, j] += trade.iloc[i - 1, j]
    #print(shares)
    portvals = prices * trade
    #print(prices.head())
    #print(trade.head())
    #print(portvals.head())
    portvals = portvals.sum(axis=1)
    #print(portvals.head())
    #    cum_ret=(portvals[-1]/portvals[0])-1
    #    daily_ret=(portvals/portvals.shift(1))-1
    #    avg_daily_ret=daily_ret.mean()
    #    std_daily_ret=daily_ret.std()
    #    sharpe_ratio=np.sqrt(252)*(daily_ret).mean()/std_daily_ret
    #    #print(portvals.shape)
    #    # Compare portfolio against $SPX
    #    print(f"Date Range: {start_date} to {end_date}")
    #    print()
    #    print(f"Sharpe Ratio of Fund: {sharpe_ratio}")
    #    #print(f"Sharpe Ratio of SPY : {sharpe_ratio_SPY}")
    #    print()
    #    print(f"Cumulative Return of Fund: {cum_ret}")
    #    #print(f"Cumulative Return of SPY : {cum_ret_SPY}")
    #    print()
    #    print(f"Standard Deviation of Fund: {std_daily_ret}")
    #    #print(f"Standard Deviation of SPY : {std_daily_ret_SPY}")
    #    print()
    #    print(f"Average Daily Return of Fund: {avg_daily_ret}")
    #    #print(f"Average Daily Return of SPY : {avg_daily_ret_SPY}")
    #    print()
    #    print(f"Final Portfolio Value: {portvals[-1]}")

    #return rv
    return portvals
    ax2 = ax1.twinx()
    normed = prices / prices.ix[0]
    normed_spy = spy_prices / spy_prices.ix[0]
    normed.plot(ax=ax1, color='orange', lw=1.2, legend=False)
    normed_spy.plot(ax=ax1, color='green', lw=1.2, legend=False)
    ratio.plot(ax=ax2, color='blue', lw=1.2)
    ax1.set_ylabel('Normalized Price')
    ax2.set_ylabel('Ratio of SPY to JPM')
    ax1.set_xlabel('Date')
    plt.grid(True)
    or_patch = mpatches.Patch(color='orange', label='JPM')
    green_patch = mpatches.Patch(color='green', label='SPY')
    blue_patch = mpatches.Patch(color='blue', label='Ratio of SPY to JPM')
    plt.legend(handles=[or_patch, green_patch, blue_patch], loc='lower left')
    plt.title('SPY-to-JPM Normalized Ratio Indicator')
    #plt.show()
    plt.savefig('spy_jpm_ratio.pdf')


if __name__ == "__main__":
    start = '01-01-2008'
    end = '12-31-2009'
    dates = pd.date_range(start, end)
    prices = get_data(['JPM'], dates).drop(['SPY'], axis=1)
    spy = get_data(['SPY'], dates, addSPY=False)
    sma = get_sma(prices, 50)[1]
    print(sma.join(sma, lsuffix='_sma', rsuffix='_sma')).join(sma).as_matrix()
    #sma_plot = plot_sma(prices, 50)
    #bb_plot = plot_bb(prices, 50)
    #ratio_plot = plot_spy_ratio(prices, spy)
Пример #37
0
    df[['MACD', 'MACD_SIGNAL']].plot(kind='bar', ax=ax)

    # ax2 = ax.twinx()
    # ax2.plot(ax.get_xticks(), df[['MACD', 'MACD_SIGNAL']].values)

    plt.grid()
    plt.savefig(filename + '.png')


if __name__ == "__main__":

    syms = ['JPM']
    sd = dt.datetime(2008, 1, 1)
    ed = dt.datetime(2009, 12, 31)
    dates = pd.date_range(sd, ed + dt.timedelta(days=1))
    prices_all = get_data(syms, dates)  # automatically adds SPY
    prices = prices_all[syms]  # only portfolio symbols
    prices = prices.fillna(method='ffill', inplace=False)
    prices = prices.fillna(method='bfill', inplace=False)

    priceOverSMAValues = priceOverSMA(prices)
    priceOverEMAClubbed = priceOverEMAClubbed(prices)
    bband, bbp = bbands(prices)
    macdVal = macd(prices)
    ema_fast = ema(prices, window=12)
    ema_fast.rename(columns={'JPM': 'EMA Fast (12)'}, inplace=True)
    ema_slow = ema(prices, window=26)
    ema_slow.rename(columns={'JPM': 'EMA Slow(26)'}, inplace=True)
    ema = pd.concat([prices, ema_fast, ema_slow], axis=1)
    rsiV = rsi(prices)
Пример #38
0
def experiment_2_helper(symbol,
                        start_train,
                        end_train,
                        start_test,
                        end_test,
                        sv=100000,
                        commission=0.0,
                        impact=0.0):

    np.random.seed(1000)
    random.seed(1000)

    slearner = sl.StrategyLearner(impact=impact)
    slearner.addEvidence(symbol=symbol, sd=start_train, ed=end_train, sv=sv)

    # In-Sample Experiment 1
    # Calculate manual strategy portfolio values
    manual_trades = man.testPolicy(symbol=symbol,
                                   sd=start_test,
                                   ed=end_test,
                                   sv=sv)
    manual_port_vals = ms.compute_portvals(manual_trades,
                                           start_val=sv,
                                           commission=commission,
                                           impact=impact)
    manual_port_vals = manual_port_vals / manual_port_vals.iloc[0, :]
    manual_port_vals.rename(columns={"Portfolio Value": "Manual"},
                            inplace=True)

    # Calculate benchmark portfolio values
    dates = pd.date_range(start_test, end_test)
    syms = [symbol]
    price_range = get_data(syms, dates)  # automatically adds SPY
    benchmark_trades = pd.DataFrame(
        data=[[symbol, "BUY", 1000]],
        index=[price_range.index[0], price_range.index[-1]],
        columns=["Symbol", "Order", "Shares"])
    bench_port_vals = ms.compute_portvals(benchmark_trades,
                                          start_val=sv,
                                          commission=commission,
                                          impact=impact)
    bench_port_vals = bench_port_vals / bench_port_vals.iloc[0, :]
    bench_port_vals.rename(columns={"Portfolio Value": "Benchmark"},
                           inplace=True)

    # Calculate strategy learner portfolio values
    temp_strategylearner_trades = slearner.testPolicy(symbol=symbol,
                                                      sd=start_test,
                                                      ed=end_test,
                                                      sv=sv)
    strategylearner_trades = pd.DataFrame(
        columns=['Order', 'Symbol', 'Shares'])
    for row_idx in temp_strategylearner_trades.index:
        nshares = temp_strategylearner_trades.loc[row_idx][0]
        if nshares == 0:
            continue
        order = 'SELL' if nshares < 0 else 'BUY'
        new_row = pd.DataFrame([
            [order, symbol, abs(nshares)],
        ],
                               columns=['Order', 'Symbol', 'Shares'],
                               index=[
                                   row_idx,
                               ])
        strategylearner_trades = strategylearner_trades.append(new_row)

    strategylearner_port_vals = ms.compute_portvals(strategylearner_trades,
                                                    start_val=sv,
                                                    commission=commission,
                                                    impact=impact)
    strategylearner_port_vals = strategylearner_port_vals / strategylearner_port_vals.iloc[
        0, :]
    strategylearner_port_vals.rename(columns={"Portfolio Value": "Strategy"},
                                     inplace=True)

    port_vals = pd.DataFrame(bench_port_vals["Benchmark"],
                             index=bench_port_vals.index)
    port_vals["Manual"] = manual_port_vals["Manual"]
    port_vals["Strategy"] = strategylearner_port_vals["Strategy"]
    port_vals.fillna(method='ffill', inplace=True)

    mcr, madr, msddr, msr = id.calculate_portfolio_metrics(manual_port_vals)
    bcr, badr, bsddr, bsr = id.calculate_portfolio_metrics(bench_port_vals)
    scr, sadr, ssddr, ssr = id.calculate_portfolio_metrics(
        strategylearner_port_vals)
    return mcr, madr, msddr, msr, bcr, badr, bsddr, bsr, scr, sadr, ssddr, ssr
Пример #39
0
def test_run():
    # Set up
    start_date = dt.datetime(2009, 01, 01)
    end_date = dt.datetime(2011, 01, 01)
    symbols = ['GOOG', 'AAPL', 'GLD', 'XOM']
    allocations = [0.2, 0.3, 0.4, 0.1]
    start_val = 1000000
    risk_free_rate = 0.0
    sample_freq = 252

    # New Function, call this function here
    dates = pd.date_range(start_date, end_date)
    prices_all = util.get_data(symbols, dates)

    prices = prices_all[symbols]  # only portfolio symbols
    prices_SPY = prices_all["SPY"]  # Only SPY, for comparison later

    # Get Daily Portfolio Value
    prices_SPY = prices_SPY / prices_SPY.ix[0, :]  # Normalizes prices_SPY

    normed_prices = prices / prices.ix[0, :]
    alloced = normed_prices * allocations
    port_vals = alloced.sum(axis=1)
    daily_returns = compute_daily_returns(port_vals)

    # Get Portfolio Statistics
    cum_ret = (port_vals[-1] / port_vals[0]) - 1  # Cumulative Returns
    avg_daily_ret = daily_returns.mean()
    std_daily_ret = daily_returns.std()

    # Do if statement to be sure this is daily, else yearly / monthly
    # Below is Daily:
    #   avg_daily_risk_free_rate = (1.0 + yearly_risk_free_rate)**(1. / 252) - 1          # ASK ABOUT Daily RISK FREE RATE USED OR NOT
    #   SR = (avg_daily_ret - avg_daily_risk_free_rate) / std_daily_ret
    #   K = np.sqrt(252)
    #   SRannualized = K * SR
    #
    #Below is Weekly:
    #   avg_weekly_risk_free_rate = (1.0 + yearly_risk_free_rate)**(1. / 52) - 1          # ASK ABOUT Weekly RISK FREE RATE USED OR NOT
    #   SR = (avg_weekly_ret - avg_weekly_risk_free_rate) / std_weekly_ret                   # ASK ABOUT AVG_MONTHLY_RETURNS / STD_MONTHLY_RETURNS
    #   K = np.sqrt(12)
    #   SRannualized = K * SR
    #
    #Below is Monthly:
    #   avg_monthly_risk_free_rate = (1.0 + yearly_risk_free_rate)**(1. / 12) - 1          # ASK ABOUT Monthly RISK FREE RATE USED OR NOT ## Question: for monthly do I simply replace 252 with 12? what about the 1.0?
    #   SR = (avg_daily_ret - avg_monthly_risk_free_rate) / std_monthly_ret                   # ASK ABOUT AVG_MONTHLY_RETURNS / STD_MONTHLY_RETURNS
    #   K = np.sqrt(12)
    #   SRannualized = K * SR
    #Below is Yearly:
    #   risk_free_rate = risk_free_rate                                                     # ASK ABOUT Annual RISK FREE RATE USED OR NOT
    #   SR = (avg_yearly_return - risk_free_rate) / std_yearly_return              # ASK ABOUT this being ave yearly?
    #   SRannualized = SR

    rfr_freq_calc = ((1.0 + float(risk_free_rate))**(1. / sample_freq)) - 1
    # SR = (daily_returns - risk_free_rate).mean() / (np.sqrt(sample_freq) * (daily_returns).std())  # 0.00870688461805
    SR = (float(avg_daily_ret) - float(rfr_freq_calc)) / float(std_daily_ret)
    K = np.sqrt(sample_freq)
    SRannualized = K * SR

    end_value = start_val * (cum_ret + 1)

    # Compare daily portfolio value with SPY using a normalized plot
    df_temp = pd.concat([port_vals, prices_SPY],
                        keys=['Portfolio', 'SPY'],
                        axis=1)
    # util.plot_data(df_temp, ylabel="Normalized Price")
    # plt.show()

    # Print statistics
    print "Start Date:", start_date
    print "End Date:", end_date
    print "Symbols:", symbols
    print "Allocations:", allocations
    print "Sharpe Ratio:", SRannualized
    print "Volatility (stdev of daily returns):", std_daily_ret
    print "Average Daily Return:", avg_daily_ret
    print "Cumulative Return:", cum_ret
Пример #40
0
 def getPrices(self, startDate, endDate, symbolList):
     dateRange = pd.date_range(startDate, endDate)
     prices = util.get_data(symbolList, dateRange)
     self.prices = prices[symbolList]
     self.normalizedPrices = self.prices / self.prices.ix[0]
     self.prevTransaction = 0.0
Пример #41
0
def main(args):
    assert args.dataset in ['mnist', 'cifar', 'svhn'], \
        "Dataset parameter must be either 'mnist', 'cifar' or 'svhn'"
    assert args.attack in ['fgsm', 'bim-a', 'bim-b', 'jsma', 'cw-l2', 'all', 'cw-lid'], \
        "Attack parameter must be either 'fgsm', 'bim-a', 'bim-b', " \
        "'jsma', 'cw-l2', 'all' or 'cw-lid' for attacking LID detector"
    #model_file = os.path.join(PATH_DATA, "model_%s.h5" % args.dataset)
    model_file = "../model/densenet_cifar10.h5df"
    print(model_file)
    assert os.path.isfile(model_file), \
        'model file not found... must first train model using train_model.py.'
    if args.dataset == 'svhn' and args.attack == 'cw-l2':
        assert args.batch_size == 16, \
        "svhn has 26032 test images, the batch_size for cw-l2 attack should be 16, " \
        "otherwise, there will be error at the last batch-- needs to be fixed."

    print('Dataset: %s. Attack: %s' % (args.dataset, args.attack))
    # Create TF session, set it as Keras backend
    init_op = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init_op)
    #sess.run(tf.local_variables_initializer())
    sess.run(tf.initialize_all_variables())
    tf.keras.backend.set_session(sess)

    if args.attack == 'cw-l2' or args.attack == 'cw-lid':
        warnings.warn("Important: remove the softmax layer for cw attacks!")
        # use softmax=False to load without softmax layer
        if args.model == 'dense':
            model = densenet.create_dense_net(10,
                                              False, (32, 32, 3),
                                              40,
                                              3,
                                              12,
                                              16,
                                              dropout_rate=0)
            optimizer = Adam(
                lr=1e-4)  # Using Adam instead of SGD to speed up training
            model.compile(loss='categorical_crossentropy',
                          optimizer=optimizer,
                          metrics=["accuracy"])
        if args.dataset == 'mnist':
            model = get_model(args.dataset, softmax=False)
            model.compile(loss=cross_entropy,
                          optimizer='adadelta',
                          metrics=['accuracy'])
        if args.dataset == 'svhn':
            model = RCNN.get_model(False)
        model.load_weights(model_file)
    else:
        if args.model == 'dense':
            model = densenet.create_dense_net(10,
                                              True, (32, 32, 3),
                                              40,
                                              3,
                                              12,
                                              16,
                                              dropout_rate=0)

            optimizer = Adam(
                lr=1e-4)  # Using Adam instead of SGD to speed up training
            model.compile(loss='categorical_crossentropy',
                          optimizer=optimizer,
                          metrics=["accuracy"])
            model.load_weights(model_file)

        elif args.dataset == 'svhn':
            model = RCNN.get_model(True)
            model.load_weights(model_file)
        else:
            model = load_model(model_file)

    _, _, X_test, Y_test = get_data(args.dataset)
    score = model.evaluate(X_test,
                           Y_test,
                           batch_size=args.batch_size,
                           verbose=0)
    print("Accuracy on the test set: %0.2f%%" % (100 * score[1]))

    if args.attack == 'cw-lid':  # white box attacking LID detector - an example
        X_test = X_test[:1000]
        Y_test = Y_test[:1000]

    if args.attack == 'all':
        # Cycle through all attacks
        for attack in ['fgsm', 'bim-a', 'bim-b', 'jsma', 'cw-l2']:
            craft_one_type(sess, model, X_test, Y_test, args.dataset, attack,
                           args.batch_size)
    else:
        # Craft one specific attack type
        craft_one_type(sess, model, X_test, Y_test, args.dataset, args.attack,
                       args.batch_size)
    print('Adversarial samples crafted and saved to %s ' % PATH_DATA)
    _, acc = model.evaluate(X_test,
                            Y_test,
                            batch_size=args.batch_size,
                            verbose=0)
    print("After crafting, Accuracy on the test set: %0.2f%%" % (100 * acc))
    sess.close()
Пример #42
0
    nnodes = [256, 128, 64]
    nmessage = 3

    # make the model
    mus = np.linspace(0.8, 5.0, 43)
    etas = np.array([-100.0] * 43)
    model = util.get_model(mus, etas, pad_dim, nelem, nembed, nnodes, nmessage)

    model.compile(optimizer=tf.keras.optimizers.Adam(0.001),
                  loss=['mse', util.mse_mp, util.mse_mp, util.mse_mp],
                  loss_weights=[1.0, 1.0, 1.0, 1.0],
                  metrics=[util.mae_mp])
    print(model.summary())

    # load data
    RT, ZT, yT = util.get_data(f'data/{args.dataset_train}.pkl', pad_dim)
    RV, ZV, yV = util.get_data(f'data/{args.dataset_val}.pkl', pad_dim)

    #RT, ZT, yT = RT[:800], ZT[:800], yT[:800]
    #RV, ZV, yV = RV[:800], ZV[:800], yV[:800]

    # monopole
    yV_ = yV[:, :, 0]

    # dipole (mu_x, mu_y, mu_z)
    yV_i_ = yV[:, :, 1:4]

    # quadrupole diagonal (Q_xx, Q_yy, Q_zz)
    yV_ii_ = yV[:, :, [4, 7, 9]]

    # quadrupole off-diagonal (Q_xy, Q_xz, Q_yz)
Пример #43
0
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
from munkres import Munkres, print_matrix

from sklearn import metrics

from sklearn.metrics import cluster
from sklearn.cluster import KMeans

from util import get_data
from util import prepare_data
from util import prepare_data

df = get_data()

cat_df_list = list(df.select_dtypes(include=['object']))
num_df_list = list(df.select_dtypes(include=['float64', 'int64']))
km_scores = []
inertia = []
km_silhouette = []
vmeasure_score = []
db_score = []

y = df["readmitted"]
X = df[num_df_list]
#X.drop("readmitted", inplace=True, axis=1)
scaler = StandardScaler()
X = StandardScaler().fit_transform(X)
Пример #44
0
        self.max_depth = max_depth

    def fit(self, X, Y):
        self.root = TreeNode(max_depth=self.max_depth)
        self.root.fit(X, Y)

    def predict(self, X):
        return self.root.predict(X)

    def score(self, X, Y):
        P = self.predict(X)
        return np.mean(P == Y)


if __name__ == '__main__':
    X, Y = get_data()

    # try donut and xor
    # from sklearn.utils import shuffle
    # X, Y = get_xor()
    # # X, Y = get_donut()
    # X, Y = shuffle(X, Y)

    # only take 0s and 1s since we're doing binary classification
    idx = np.logical_or(Y == 0, Y == 1)
    X = X[idx]
    Y = Y[idx]

    # split the data
    Ntrain = len(Y) / 2
    Xtrain, Ytrain = X[:Ntrain], Y[:Ntrain]
Пример #45
0
def optimize_portfolio(
    sd=dt.datetime(2008, 1, 1),
    ed=dt.datetime(2009, 1, 1),
    syms=["GOOG", "AAPL", "GLD", "XOM"],
    gen_plot=False,
):
    """  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    This function should find the optimal allocations for a given set of stocks. You should optimize for maximum Sharpe  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    Ratio. The function should accept as input a list of symbols as well as start and end dates and return a list of  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    floats (as a one-dimensional numpy array) that represents the allocations to each of the equities. You can take  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    advantage of routines developed in the optional assess portfolio project to compute daily portfolio value and  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    statistics.  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    :param sd: A datetime object that represents the start date, defaults to 1/1/2008  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    :type sd: datetime  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    :param ed: A datetime object that represents the end date, defaults to 1/1/2009  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    :type ed: datetime  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    :param syms: A list of symbols that make up the portfolio (note that your code should support any  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
        symbol in the data directory)  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    :type syms: list  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    :param gen_plot: If True, optionally create a plot named plot.png. The autograder will always call your  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
        code with gen_plot = False.  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    :type gen_plot: bool  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    :return: A tuple containing the portfolio allocations, cumulative return, average daily returns,  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
        standard deviation of daily returns, and Sharpe ratio  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    :rtype: tuple  		  	   		   	 			  		 			     			  	  		 	  	 		 			  		  			
    """

    # Read in adjusted closing prices for given symbols, date range
    dates = pd.date_range(sd, ed)
    prices_all = get_data(syms, dates)  # automatically adds SPY
    prices = prices_all[syms]  # only portfolio symbols
    prices_SPY = prices_all["SPY"]  # only SPY, for comparison later

    # find the allocations for the optimal portfolio

    n = len(syms)
    allocs = [1 / n] * n
    result = spo.minimize(f,
                          allocs,
                          args=prices,
                          method='SLSQP',
                          bounds=[(0, 1)] * n,
                          constraints=({
                              'type': 'eq',
                              'fun': lambda x: 1.0 - np.sum(x)
                          }))
    optimum_allocs = result.x

    # note that the values here ARE NOT meant to be correct for a test case
    cr, adr, sddr, sr = assess_portfolio(
        optimum_allocs, prices)  # add code here to compute stats

    # Get daily portfolio value
    port_val = get_port_val(
        optimum_allocs,
        prices)  # add code here to compute daily portfolio values

    # Compare daily portfolio value with SPY using a normalized plot
    if gen_plot:
        # add code to plot here
        df_temp = pd.concat([port_val, prices_SPY],
                            keys=["Portfolio", "SPY"],
                            axis=1)
        #
        df_temp = np.divide(df_temp, df_temp.iloc[0].values)
        # df_temp.iloc[0, :] = 0
        plt.figure(1)
        ax = df_temp.plot(title='Portfolio Value ' + str(syms) + ' and SPY')
        ax.set_ylabel('Normalized Prices')
        ax.set_xlabel('Dates')
        plt.savefig('plot.png')
        pass

    return optimum_allocs, cr, adr, sddr, sr
Пример #46
0
    def _get_data(symbol, dates, column):
        data = get_data([symbol], dates, colname=column)
        data.fillna(method='ffill', inplace=True)
        data.fillna(method='bfill', inplace=True)

        return data
def test_code():

    startDate = dt.datetime(2008, 1, 1)
    endDate = dt.datetime(2009, 12, 31)
    dateRange = pd.date_range(startDate, endDate)
    symbol = 'JPM'

    prices = get_data([symbol], dateRange)
    indicators = get_indicators(prices, symbol)
    """ Graph for SMA """
    sma = indicators[['price', 'SMA', 'price_SMA']]

    fig, ax = plt.subplots()

    ax.plot(sma['price'], label="Price")
    ax.plot(sma['SMA'], label="20-Day SMA", linewidth=2)
    ax.plot(sma['price_SMA'], label="Price/SMA", linewidth=0.85)

    ax.set(xlabel="Jan. 1, 2008 - Dec. 31, 2009",
           ylabel="Value (Normalized)",
           title="20-Day Simple Moving Average for JPM")

    ax.set_xlim(startDate, endDate)
    ax.title.set_fontsize(14)
    ax.xaxis.label.set_fontsize(14)
    ax.yaxis.label.set_fontsize(14)

    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles, labels)

    fig.autofmt_xdate()
    plt.show()
    """ Graph for Bollinger Bands"""
    bands = indicators[['price', 'upper band', 'lower band']]

    fig, ax = plt.subplots()
    ax.plot(bands['price'], label="Price")
    ax.plot(bands['upper band'],
            label="Upper Band",
            linewidth=0.85,
            linestyle='dashed')
    ax.plot(bands['lower band'],
            label="Lower Band",
            linewidth=0.85,
            linestyle='dashed')

    ax.set(xlabel="Jan. 1, 2008 - Dec. 31, 2009",
           ylabel="Value (Normalized)",
           title="Bollinger Bands for JPM Based on 20-Day Moving Average")

    ax.set_xlim(startDate, endDate)
    ax.title.set_fontsize(14)
    ax.xaxis.label.set_fontsize(14)
    ax.yaxis.label.set_fontsize(14)

    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles, labels)

    fig.autofmt_xdate()
    plt.show()
    """ Graph for Bollinger Band Value """
    bb_value = indicators[['price', 'bb value']]

    fig, ax = plt.subplots()
    #ax.plot(bb_value['price'], label="Price")
    ax.plot(bb_value['bb value'], label="Bollinger Band Value")

    ax.set(xlabel="Jan. 1, 2008 - Dec. 31, 2009",
           ylabel="Value (Normalized)",
           title="Bollinger Band Value for JPM Based on 20-Day Moving Average")

    ax.set_xlim(startDate, endDate)
    ax.title.set_fontsize(14)
    ax.xaxis.label.set_fontsize(14)
    ax.yaxis.label.set_fontsize(14)

    ax.axhline(y=0, linewidth=0.85, linestyle='dashed', color='0.5')
    ax.axhline(y=1, linewidth=0.75, linestyle='dashed', color='0.5')
    ax.axhline(y=-1, linewidth=0.75, linestyle='dashed', color='0.5')

    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles, labels)

    fig.autofmt_xdate()
    plt.show()
    """ Graph for Momentum """
    momentum = indicators[['price', 'momentum']]

    fig, ax = plt.subplots()
    ax.plot(momentum['price'], label="Price")
    ax.plot(momentum['momentum'], label="momentum")

    ax.set(xlabel="Jan. 1, 2008 - Dec. 31, 2009",
           ylabel="Stock Price (Normalized)",
           title="Momentum for JPM Over a 20-Day Period")

    ax.set_xlim(startDate, endDate)
    ax.title.set_fontsize(14)
    ax.xaxis.label.set_fontsize(14)
    ax.yaxis.label.set_fontsize(14)

    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles, labels)

    fig.autofmt_xdate()
    plt.show()
    """ Graph for Volatility """
    vol = indicators[['price', 'volatility']]

    fig, ax = plt.subplots()
    ax.plot(vol['price'], label="Price")
    ax.plot(vol['volatility'], label="Volatility")

    ax.set(xlabel="Jan. 1, 2008 - Dec. 31, 2009",
           ylabel="Value (Normalized)",
           title="Volatility for JPM Based on 20-Day Moving Average")

    ax.set_xlim(startDate, endDate)
    ax.title.set_fontsize(14)
    ax.xaxis.label.set_fontsize(14)
    ax.yaxis.label.set_fontsize(14)

    y = np.arange(0, 1.3, 0.1)
    plt.yticks(y)
    plt.grid()

    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles, labels)

    fig.autofmt_xdate()
    plt.show()
    """ Graph for RSI w/ 20 day rolling mean"""
    rsi = indicators[['price', 'RSI_SMA', 'RSI_EMWA', 'prices_unnormed']]

    fig, ax = plt.subplots()
    ax.plot(rsi['prices_unnormed'], label="Price")
    ax.plot(rsi['RSI_SMA'], label="RSI Simple Moving Average", linewidth=0.85)
    ax.plot(rsi['RSI_EMWA'],
            label="RSI Exponential Moving Average",
            linewidth=0.85)

    ax.set(
        xlabel="Jan. 1, 2008 - Dec. 31, 2009",
        ylabel="Value",
        title=
        "RSI for JPM Based on Exponential Moving Average and \n 20-Day Simple Moving Average"
    )

    ax.set_xlim(dt.datetime(2008, 1, 30), endDate)
    ax.title.set_fontsize(14)
    ax.xaxis.label.set_fontsize(14)
    ax.yaxis.label.set_fontsize(14)

    y = np.arange(0, 90, 10)
    plt.yticks(y)

    ax.axhline(y=50, linewidth=0.85, linestyle='dashed', color='0.5')
    ax.axhline(y=30, linewidth=0.75, color='0.5')
    ax.axhline(y=70, linewidth=0.75, color='0.5')

    handles, labels = ax.get_legend_handles_labels()
    ax.legend(handles, labels)

    fig.autofmt_xdate()
    plt.show()
Пример #48
0
# -*- coding: utf-8 -*-

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os
from deepy.networks import AutoEncoder
from deepy.layers import RNN, Dense
from deepy.trainers import SGDTrainer, LearningRateAnnealer

from util import get_data, VECTOR_SIZE, SEQUENCE_LENGTH

HIDDEN_SIZE = 50

model_path = os.path.join(os.path.dirname(__file__), "models", "rnn1.gz")

if __name__ == '__main__':
    model = AutoEncoder(input_dim=VECTOR_SIZE, input_tensor=3)
    model.stack_encoders(RNN(hidden_size=HIDDEN_SIZE, input_type="sequence", output_type="one"))
    model.stack_decoders(RNN(hidden_size=HIDDEN_SIZE, input_type="one", output_type="sequence", steps=SEQUENCE_LENGTH),
                         Dense(VECTOR_SIZE, 'softmax'))

    trainer = SGDTrainer(model)

    annealer = LearningRateAnnealer(trainer)

    trainer.run(get_data(), controllers=[annealer])

    model.save_params(model_path)

Пример #49
0
        #pdb.set_trace()

        return weighted_population[-1]


if __name__ == "__main__":
    #TODO: Fix main method so we can use it to test the GA
    start_val = 100000
    symbol = "GOOG"

    #In-sample period
    dates = [dt.datetime(2011, 1, 1), dt.datetime(2011, 12, 31)]

    #Benchmark
    benchmark_df = util.get_data([symbol],
                                 pd.date_range(dates[0], dates[1]),
                                 addSPY=False).dropna()

    #Benchmark trades
    benchmark_trades_df = pd.DataFrame(
        data=[(benchmark_df.index.min(), symbol, "BUY", 1000),
              (benchmark_df.index.max(), symbol, "SELL", 1000)],
        columns=['Date', 'Symbol', 'Order', 'Shares'])
    benchmark_trades_df.set_index('Date', inplace=True)

    gen_alg = GeneticAlgorithm(symbol=symbol, dates=dates, start_val=start_val)
    params, sharpe_ratio = gen_alg.start_ga()

    pdb.set_trace()
    manual_strat = manstrat.ManualStrategy()
    trades_df = manual_strat.testPolicy(symbol,
Пример #50
0
def compute_portvals(orders_file="./orders/orders.csv", start_val=1000000):
    # this is the function the autograder will call to test your code
    # TODO: Your code here
    orders_df = pd.read_csv(orders_file,
                            index_col='Date',
                            parse_dates=True,
                            na_values=['nan'])

    #sort by index to get order right
    orders_df = orders_df.sort_index()
    print "ORDER BOOK", orders_df

    #convert index to datetime
    orders_df.index = pd.to_datetime(orders_df.index)

    #get start date, end date of order book
    sd = orders_df.index.values[0]
    ed = orders_df.index.values[-1]

    #get all symbols in order book
    def scrape_symbols(df):
        symbol_list = []
        for i in range(0, (df.shape[0])):
            symbol = df.iloc[i, 0]
            if symbol in symbol_list:
                pass
            else:
                symbol_list.append(symbol)
        return symbol_list

    syms = scrape_symbols(orders_df)

    #create a dataframe based on the order book that contains a column for each stock listed, plus SPY, and cash column

    #dummy values for now

    #sd = dt.datetime(2010, 1, 1)
    #ed= dt.datetime(2010, 12, 31)

    dates = pd.date_range(sd, ed)
    prices_all = get_data(syms, dates)  # automatically adds SPY

    # add cash column for later
    prices_all['Cash'] = np.ones(prices_all.shape[0])

    #duplicate price df into a units df and intialize to zero
    units_all = prices_all * 0.0

    #initialize starting cash position
    units_all.iloc[0, -1] = start_val

    order = orders_df.iloc[0]

    #adjust units_all to show how stock units and cash are changing over time w/orders

    for index2, row2 in orders_df.iterrows():
        stock_name = row2[0]
        order_price = prices_all[stock_name].ix[index2]
        order_units = row2[2]
        if row2[1] == "BUY":
            pos_multplr = -1
        else:
            pos_multplr = 1
        #update units_all with order
        units_all.loc[index2, stock_name] += order_units * pos_multplr * -1
        units_all.loc[index2,
                      "Cash"] += order_units * order_price * pos_multplr

    print units_all.head()

    #now update units_all to be full accounting table of units over time
    for i in range(1, units_all.shape[0]):
        for j in range(0, units_all.shape[1]):
            new_val = units_all.iloc[i, j] + units_all.iloc[i - 1, j]
            units_all.iloc[i, j] = new_val

    #finally get port_vals
    port_vals = prices_all * units_all

    port_vals["port_val"] = port_vals.sum(axis=1)

    port_vals["daily_returns"] = (port_vals["port_val"][1:] /
                                  port_vals["port_val"][:-1].values) - 1
    port_vals["daily_returns"][0] = 0

    #now we have the port_val by day so can calculate common statistics

    def compute_pf_stats(df, rfr, sf):
        # Get portfolio statistics (note: std_daily_ret = volatility)
        # code for stats
        cr = (df.ix[-1, -2] - df.ix[0, -2]) / df.ix[0, -2]

        # adr
        adr = df["daily_returns"][1:].mean()

        # sddr, std deviation of daily returns
        sddr = df["daily_returns"][1:].std()

        # Sharpe Ratio
        sr = (sf**(1.0 / 2.0) * (adr - rfr)) / sddr

        # Compare daily portfolio value with SPY using a normalized plot

        return cr, adr, sddr, sr

    cr, adr, sddr, sr = compute_pf_stats(port_vals, rfr=0, sf=252)

    #update row based on orders that day

    #update port_vals to only be one column of values
    port_val = port_vals.iloc[:, -2:-1]

    print "cr, adr, sddr, sr", cr, adr, sddr, sr
    return port_val
Пример #51
0
    def testPolicy(self, symbol = "IBM", \
        sd=dt.datetime(2009,1,1), \
        ed=dt.datetime(2010,1,1), \
        sv = 10000):

        # compute the technical indicators
        sym = [symbol]
        momentum3, sma_ratio3, bbp3 = ind.indicators(sd, ed, sym, 70, False)
        momentum14, sma_ratio14, bbp14 = ind.indicators(sd, ed, sym, 14, False)

        # create feature array and discretize the values of the features
        row = momentum3.values[:, 0].size - 70
        features = np.zeros((row, 7))
        features[:, 0] = momentum3.ix[70:, symbol].values
        features[:, 1] = sma_ratio3.ix[70:, symbol].values
        features[:, 2] = bbp3.ix[70:, symbol].values
        features[:, 3] = momentum14.ix[70:, symbol].values
        features[:, 4] = sma_ratio14.ix[70:, symbol].values
        features[:, 5] = bbp14.ix[70:, symbol].values

        fmin = features.min(axis=0)
        fmax = features.max(axis=0)
        for i in range(6):
            bins = np.linspace(fmin[i], fmax[i], 9)
            features[:, i] = np.digitize(features[:, i], bins)
        # print features[0:30, ]

        for i in range(row):
            features[i, 6] = int(
                str(int(features[i, 0])) + str(int(features[i, 1])) +
                str(int(features[i, 2])) + str(int(features[i, 3])) +
                str(int(features[i, 4])) + str(int(features[i, 5])))
        # print features[0:30, ]

        # build a set of trades
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data([symbol], dates)  # automatically adds SPY
        holdings = prices_all[[
            symbol,
        ]]  # only portfolio symbols
        holdings.values[:, :] = 0  # set them all to nothing

        pre_action = 0
        for i in (range(row)):
            cur_action = self.learner.querysetstate(
                int(str(int(features[i, 6])) + str(int(pre_action))))
            ##print "row is ", i, " cur_action is ", cur_action
            if cur_action == 1:  ## buy and long 1000
                holdings.values[70 + i, :] = 1000
            elif cur_action == 2:  ## sell and short 1000
                holdings.values[70 + i, :] = -1000
            else:  ## no holding
                holdings.values[70 + i, :] = 0
            pre_action = cur_action

        trades = holdings.copy()
        trades[1:] = trades.diff()
        if self.verbose: print type(trades)  # it better be a DataFrame!
        if self.verbose: print trades
        if self.verbose: print prices_all
        return trades
Пример #52
0
    verbose = False
    start_val = 100000
    benchmarkSymbol = "JPM"
    commission = 0.00
    impact = 0.0
    num_shares = 1000


    print "In-sample training period"
    start_date = dt.datetime(2008, 1, 1,0,0)
    end_date = dt.datetime(2009, 12, 31,0,0)

    # Create benchmark data series. Benchmark is a portfolio starting with
    # $100,000, investing in 1000 shares of symbol and holding that position
    dates = pd.date_range(start_date, end_date)
    prices_all = get_data([benchmarkSymbol], dates).dropna()
    indexDates = prices_all.index
    zeroes = [0.0] * len(prices_all)
    benchmarkTrades = pd.DataFrame({"Date": indexDates, benchmarkSymbol: zeroes})
    benchmarkTrades = benchmarkTrades.set_index('Date')
    benchmarkTrades.iloc[0][0] = 1000 #set to buy LONG on day1
    benchmarkTrades.iloc[(len(prices_all)-1)][0] = -1000  # set to sell all on the last day

    benchmarkOrders = pd.Series(index=indexDates, data=zeroes)
    benchmarkOrders.iloc[0] = 1.0  # set to buy LONG on day1
    benchmarkOrders.iloc[(len(prices_all)-1)] = -1.0  # set to sell all on the last day
    # Train and test a StrategyLearner

    stl = StrategyLearner(num_shares=num_shares, impact=impact,
                          commission=commission, verbose=True,
                          num_states=3000, num_actions=3)
Пример #53
0
def test_dot_real(data_dict):
    def get_iter(path, data_shape, batch_size):
        data_train = mx.io.LibSVMIter(data_libsvm=path,
                                      data_shape=data_shape,
                                      batch_size=batch_size)
        data_iter = iter(data_train)
        return data_iter

    data_dir = os.path.join(os.getcwd(), 'data')

    path = os.path.join(data_dir, data_dict['data_name'])
    if not os.path.exists(path):
        get_data(data_dir, data_dict['data_name'], data_dict['url'],
                 data_dict['data_origin_name'])
        assert os.path.exists(path)

    k = data_dict['feature_dim']
    m = data_dict['m']
    density = estimate_density(path, data_dict['feature_dim'])

    mini_path = os.path.join(data_dir, data_dict['data_mini'])
    if not os.path.exists(mini_path):
        os.system("head -n 2000 %r > %r" % (path, mini_path))
        assert os.path.exists(mini_path)

    print "Running Benchmarking on %r data" % data_dict['data_mini']
    for batch_size in data_dict[
            'batch_size']:  # iterator through different batch size of choice
        print "batch_size is %d" % batch_size
        # model
        data_shape = (k, )
        train_iter = get_iter(mini_path, data_shape, batch_size)
        weight = mx.nd.random_uniform(low=0, high=1, shape=(k, m))

        csr_data = []
        dns_data = []
        num_batch = 0
        for batch in train_iter:
            data = train_iter.getdata()
            csr_data.append(data)
            dns_data.append(data.tostype('default'))
            num_batch += 1
        bag_of_data = [csr_data, dns_data]
        num_repeat = 5
        costs = []
        for d in bag_of_data:
            weight.wait_to_read()
            cost = 0.
            count = 0
            for d_batch in d:
                d_batch.wait_to_read()
                cost += measure_cost(num_repeat, mx.nd.dot, d_batch, weight)
                count += 1
            costs.append(cost / count)
        t_sparse = costs[0]
        t_dense = costs[1]
        ratio = t_dense / t_sparse
        print('density(%)\tn\tm\tk\tt_dense/t_sparse\tt_dense\tt_sparse')
        fmt = "%0.4f\t\t%d\t%d\t%d\t%0.2f\t\t\t%0.4f\t%0.6f"
        print(fmt %
              (density * 100, batch_size, m, k, ratio, t_dense, t_sparse))
Пример #54
0
    def addEvidence(self, symbol="IBM", sd=dt.datetime(2008, 1, 1), ed=dt.datetime(2009, 12, 31), sv=10000):

        """Creates a QLearner, and trains it for trading.

        Inputs / Parameters:
            symbol: The stock symbol to act on
            sd: A datetime object that represents the start date
            ed: A datetime object that represents the end date
            sv: Start value of the portfolio which contains only the one symbol
        """

        # Get adjusted close prices for the given symbol on the given date range
        dates = pd.date_range(sd, ed)
        prices_all = get_data([symbol], dates) #includes SPY due to util function
        pricesDF = prices_all[[symbol]] # only the symbol
        # Get features and thresholds
        indicatorsDF = self.getIndicators(pricesDF[symbol])
        thresholds = self.setThresholds(indicatorsDF, self.num_steps)
        cum_returns = []

        for epoch in range(1, self.epochs + 1):
            # Initial position is holding nothing
            position = self.CASH
            # Create a series that captures order signals based on actions taken
            orders = pd.Series(index=indicatorsDF.index)
            # Iterate over the data by date
            for day, date in enumerate(indicatorsDF.index):
                # Get a state
                state = self.getState(indicatorsDF.loc[date], thresholds)
                # On the first day, get an action without updating the Q-table
                if date == indicatorsDF.index[0]:
                    action = self.QLearner.querysetstate(state)
                    newPos = float(action - 1)
                # On other days, calculate the reward and update the Q-table
                else:
                    prev_price = pricesDF[symbol].iloc[day - 1]
                    curr_price = pricesDF[symbol].loc[date]
                    reward = self.calcDailyReward(prev_price,curr_price, position)
                    action = self.QLearner.query(state, reward)
                    newPos = float(action - 1)

                # Add new_pos to orders, update current position
                orders.loc[date] = newPos
                position = newPos

            #get the portfolio values (which also creates the tradesDF and pricesDF, in the background
            portvals, tradesDF, holdingsDF, pricesDF = marketsimcode.compute_portvals_single_stock(ordersDF=orders, symbol=symbol, start_val=sv, commission=self.commission, impact=self.impact, num_shares = self.num_shares)
            cum_return = marketsimcode.compute_portfolio_stats(portvals)[0]
            cum_returns.append(cum_return)

            # Check for convergence after running for at least 30 epochs
            if epoch > 20:
                # Stop if the cum_return doesn't improve for 10 epochs
                if self.checkConvergence(cum_returns):
                    break

        #print "orders series from learner", orders
        #print "tradesDF from learner: ", tradesDF

        if self.verbose:
            plt.plot(cum_returns)
            plt.xlabel("Epoch")
            plt.ylabel("Cumulative return (%)")
            # plt.show()
            plt.savefig('result.png')
            plt.switch_backend('Agg')
Пример #55
0
    def dU(beta):
        return mp.dot(X.T, (mp.exp(mp.dot(X,beta))/(1+mp.exp(mp.dot(X,beta))) - y)) + beta/alpha

    D = X.shape[1]
    q = mp.zeros((D, 1), dtype=mp.float32)
    out = mp.zeros((n_iter, D), dtype=mp.float32)
    for i in range(n_iter):
        q = hmc(U, dU, epsilon, L, q)
        out[i,:] = mp.ravel(q)
    return out

with cpu() if args.mode == 'cpu' else gpu(0):
    with open('params.json') as params_file:
        out = {}
        params = json.load(params_file)
        X_train, y_train, X_test, y_test = get_data()
        X_train = mp.array(X_train)
        y_train = mp.array(y_train)
        X_test = mp.array(X_test)
        y_test = mp.array(y_test)
        y_train = mp.expand_dims(y_train, 1)
        z = lr_hmc(y_train, X_train, params['epsilon'], params['n_leaps'], params['alpha'], 1)  # Warm-up
        t = time.perf_counter()
        z = lr_hmc(y_train, X_train, params['epsilon'], params['n_leaps'], params['alpha'], params['n_iter'])  
        t = time.perf_counter() - t
        out[f'minpy-{args.mode}'] = t
        coef_ = mp.mean(z[params['burn_in']:], 0)
        acc = mp.mean((sigmoid(mp.dot(X_test, coef_)) > 0.5) == y_test)[0]
        assert acc > 0.8
        print(json.dumps(out))
Пример #56
0
import os
import math
import matplotlib.pyplot as plt
from util import get_data, plot_data


def tech_indicators(syms=['JPM'],
                    s_date=dt.datetime(2008, 01, 01),
                    e_date=dt.datetime(2009, 12, 31)):

    start_date = s_date
    end_date = e_date
    symbols = syms
    lookback = 7

    dfprices = get_data(symbols, pd.date_range(start_date, end_date))
    dfprices = dfprices.drop('SPY', axis=1)
    dfprices = dfprices / dfprices.iloc[0]

    dfsma_prices = dfprices.rolling(window=lookback,
                                    min_periods=lookback).mean()

    dfsma = dfprices / dfsma_prices

    dfsma_cross = pd.DataFrame(0, index=dfsma.index, columns=dfsma.columns)
    dfsma_cross[dfsma >= 1.0] = 1
    dfsma_cross[1:] = dfsma_cross.diff()
    dfsma_cross.iloc[0] = 0

    dfmomentum = (dfprices / dfprices.shift(lookback - 1)) - 1
def compute_portvals(orders_df,
                     start_val=1000000,
                     commission=9.95,
                     impact=0.005):
    # this is the function the autograder will call to test your code
    # NOTE: orders_file may be a string, or it may be a file object. Your
    # code should work correctly with either input
    # TODO: Your code here

    #Load in:
    # - orders
    # - dates
    # - list of stocks to call get_data on
    #orders = pd.read_csv(orders_file, index_col='Date', parse_dates=True, na_values=['nan'] ).sort_index()
    orders = orders_df.sort_index()
    stocks = orders['Symbol'].unique().tolist()
    start_date = orders.index[0]
    end_date = orders.index[-1]
    dates = pd.date_range(start_date, end_date)
    orders.fillna(method='ffill', inplace=True)
    orders.fillna(method='backfill', inplace=True)

    #get data and fill na
    data = get_data(stocks, dates)
    data.fillna(method='ffill', inplace=True)
    data.fillna(method='backfill', inplace=True)
    data["cash_change"] = 1.0

    #df that has change in number of shares by day for each asset
    # df also includes change in cash
    # make another df called port to aggregate portfolio
    #share_chg = pd.DataFrame(np.zeros((data.shape)), data.index, data.columns)
    share_chg = data.copy()
    port = data.copy()
    for col in share_chg.columns:
        share_chg[col].values[:] = 0
        port[col].values[:] = 0
    rows = orders.iterrows()
    for idx, row in rows:
        ticker = row[0]
        ord_type = row[1]
        shares = row[2]
        value = data.loc[idx, ticker] * shares
        cost = value * impact + commission
        curr_shares = share_chg.loc[idx, ticker]
        curr_cash = share_chg.loc[idx, "cash_change"]

        if ord_type == "SELL":
            share_chg.loc[idx, ticker] = curr_shares - shares
            share_chg.loc[idx, "cash_change"] = curr_cash + (value - cost)
        elif ord_type == "BUY":
            share_chg.loc[idx, ticker] = curr_shares + shares
            share_chg.loc[idx, "cash_change"] = curr_cash - (value + cost)

    port.iloc[0, :-1] = share_chg.iloc[0, :-1]
    port.iloc[0, -1] = share_chg.iloc[0, -1] + start_val

    for count in range(1, len(port.index)):
        port.iloc[count] = port.iloc[count - 1] + share_chg.iloc[count]
    port = (data * port).sum(axis=1)
    portvals = pd.DataFrame(port,
                            index=port.index,
                            columns=["portfolio_totals"])
    rv = pd.DataFrame(index=portvals.index, data=portvals.values)

    return portvals
    return rv
Пример #58
0
def compute_portvals(
    symbol,
    orders,
    start_val=1000000,
    commission=9.95,
    impact=0.005,
):
    """
    Computes the portfolio values.

    :param orders_file: Path of the order file or the file object
    :type orders_file: str or file object
    :param start_val: The starting value of the portfolio
    :type start_val: int
    :param commission: The fixed amount in dollars charged for each transaction (both entry and exit)
    :type commission: float
    :param impact: The amount the price moves against the trader compared to the historical data at each transaction
    :type impact: float
    :return: the result (portvals) as a single-column dataframe, containing the value of the portfolio for each trading day in the first column from start_date to end_date, inclusive.
    :rtype: pandas.DataFrame
    """
    # this is the function the autograder will call to test your code
    # NOTE: orders_file may be a string, or it may be a file object. Your
    # code should work correctly with either input

    # Read orders
    orders.sort_index(ascending=True, inplace=True)

    start_date = orders.index.min()
    end_date = orders.index.max()

    symbols = [symbol]
    columns = np.append(symbols, 'Cash')

    # Prices - [Date, Symbol1, Symbol2, ..., Cash]
    dates = pd.date_range(start_date, end_date)
    prices = get_data(symbols, dates)
    prices = pd.DataFrame(prices[symbols])
    prices['Cash'] = 1

    # Trades - [Date, Symbol1, Symbol2, ..., Cash] - captures changes for every day
    trades = pd.DataFrame(index=prices.index,
                          columns=columns,
                          data=np.zeros(prices.shape))

    # Populate "trades" dataframe
    for date, row in orders.iterrows():
        order = row[0]
        shares = abs(order)
        price = prices.loc[date, symbol]
        if order > 0:
            trades.loc[date, symbol] += shares
            trades.loc[date, 'Cash'] -= price * shares
        else:
            trades.loc[date, symbol] -= shares
            trades.loc[date, 'Cash'] += price * shares

        # substract transactional cost from cash
        trades.loc[date, 'Cash'] -= (commission + impact * price * shares)

    # populate 'Holdings' - [Date, Symbol1, Symbol2, ..., Cash] - captures everyday holdings
    holdings = trades.copy()
    holdings.iloc[0, -1] += start_val
    holdings = holdings.cumsum()

    # populate 'Values' - Prices * Holdings
    values = holdings * prices

    # calculate portfolio values = (prices * shares + cash) for each day
    portvals = values.sum(axis=1)
    return portvals
Пример #59
0
    def createIndicators(self):
         ##mycode
        dates = pd.date_range(self.start_date, self.end_date)
        price = ut.get_data(self.symbol, dates, addSPY = True)
        price_SPY = price['SPY']
        price = price.drop(['SPY'], axis=1)

        ##get data for all the indicators including T - 30 trading days of data (to account for creation of historical indicators)
        sma_ind = ind.get_price_sma_ind(price, 20)
        momentum_ind = ind.getMomentumInd(price, 14)
        bb_ind = ind.getBBInd(price, 14)
        macd_ind = ind.getMACDInd(price)
        vol_ind = ind.volatility(price, 14)

        #we now remove the last 30 days of data (December 2007) after creating the indicators so that we only have training period data.
        price = price.loc[price.index >= self.start_date + dt.timedelta(days=30)]
        sma_ind = sma_ind.loc[sma_ind.index >= self.start_date + dt.timedelta(days=30)]
        vol_ind = vol_ind.loc[vol_ind.index >= self.start_date + dt.timedelta(days=30)]
        momentum_ind = momentum_ind.loc[momentum_ind.index >= self.start_date + dt.timedelta(days=30)]
        bb_ind = bb_ind.loc[bb_ind.index >= self.start_date + dt.timedelta(days=30)]
        macd_ind = macd_ind.loc[macd_ind.index >= self.start_date + dt.timedelta(days=30)]

        
        ##create cross over signals for each day
        price_sma_crossover = pd.DataFrame(0, index=sma_ind.index, columns=sma_ind.columns)
        price_sma_crossover[sma_ind > 0] = 1
        price_sma_crossover = price_sma_crossover.diff()
        price_sma_crossover[price_sma_crossover != 0] = 1

        macd_sigal_diff = ind.getMACDHistogramInd(price)    
        
        #macd cross below signal = sell
        macd_cross_below_signal = pd.DataFrame(0, index=macd_ind.index, columns=macd_ind.columns)
        macd_cross_below_signal[macd_sigal_diff < 0] = 1
        macd_cross_below_signal[1:] = macd_cross_below_signal.diff()
        macd_cross_below_signal.ix[0] = 0
        #print(macd_cross_above_signal)

        #macd cross above signal = buy
        macd_cross_above_signal = pd.DataFrame(0, index=macd_ind.index, columns=macd_ind.columns)
        macd_cross_above_signal[macd_sigal_diff > 0] = 1
        macd_cross_above_signal[1:] = macd_cross_above_signal.diff()
        macd_cross_above_signal.ix[0] = 0
        #print(macd_cross_above_signal)

        #bollinger crossovers
        ##this is a sell signal
        bb_upper_cross_signal = ind.getBBUpperCross(price,20)

        #this is a buy signal
        bb_lower_cross_signal = ind.getBBLowerCross(price,20)

        ##create and discretize states for Q Learner
        #print sma_ind
        daily_rets = ((price.shift(-5)/price) - 1)
        daily_rets.ix[-1] = 0
        

        price['Price_Sma'] = sma_ind
        price['Volatility'] = vol_ind
        price['Momentum'] = momentum_ind
        price['BB_Ind'] = bb_ind
        #price['MACD_Ind'] = macd_ind

        #price['BB_Upper_Cross'] = bb_upper_cross_signal
        #price['BB_Lower_Cross'] = bb_lower_cross_signal
        price['MACD_Cross_Below'] = macd_cross_below_signal
        price['MACD_Cross_Above'] = macd_cross_above_signal
        #price['Price_SMA_Crossover'] = price_sma_crossover

        daily_ret_classes = pd.DataFrame(0, index=daily_rets.index, columns=daily_rets.columns, dtype = int)
        Y_buy = self.threshold
        Y_sell = -1*self.threshold
        daily_ret_classes[daily_rets > Y_buy] = 1
        daily_ret_classes[daily_rets < Y_sell] = -1

        price['Action'] = daily_ret_classes
        #print price.iloc[:,1:].sum(axis=1)
        #print price
        return price
Пример #60
0
    def make_idkmer_vec(self, data, hs, non_hs):
        """Make IDKmer vector.

        :param data: Need to processed FASTA file.
        :param hs: Positive FASTA file.
        :param non_hs: Negative FASTA file.
        """
        from nacutil import make_kmer_list
        from nacutil import diversity
        from nacutil import id_x_s

        rev_kmer_list, upto, revcomp, normalize = [], False, False, False

        pos_s_list = get_data(hs)
        neg_s_list = get_data(non_hs)
        # print self.k
        if self.upto is False:
            k_list = [self.k]
        else:
            k_list = list(range(1, self.k+1))

        # print 'k_list =', k_list

        # Get all kmer ID from 1-kmer to 6-kmer.
        # Calculate standard source S vector.
        pos_s_vec, neg_s_vec = [], []
        diversity_pos_s, diversity_neg_s = [], []
        for k in k_list:
            kmer_list = make_kmer_list(k, self.alphabet)

            temp_pos_s_vec = make_kmer_vector(pos_s_list, kmer_list, rev_kmer_list, k, upto, revcomp, normalize)
            temp_neg_s_vec = make_kmer_vector(neg_s_list, kmer_list, rev_kmer_list, k, upto, revcomp, normalize)

            temp_pos_s_vec = [sum(e) for e in zip(*[e for e in temp_pos_s_vec])]
            temp_neg_s_vec = [sum(e) for e in zip(*[e for e in temp_neg_s_vec])]

            pos_s_vec.append(temp_pos_s_vec)
            neg_s_vec.append(temp_neg_s_vec)

            diversity_pos_s.append(diversity(temp_pos_s_vec))
            diversity_neg_s.append(diversity(temp_neg_s_vec))

        # Calculate Diversity(X) and ID(X, S).
        sequence_list = get_data(data)
        vec = []

        for seq in sequence_list:
            # print seq
            temp_vec = []
            for k in k_list:
                kmer_list = make_kmer_list(k, self.alphabet)
                seq_list = [seq]
                kmer_vec = make_kmer_vector(seq_list, kmer_list, rev_kmer_list, k, upto, revcomp, normalize)
                # print 'k', k
                # print 'kmer_vec', kmer_vec

                # print diversity_pos_s
                if upto is False:
                    k = 1

                # print 'pos_vec', pos_s_vec
                # print 'neg_vec', neg_s_vec
                # print 'diversity_pos_s', diversity_pos_s

                temp_vec.append(round(id_x_s(kmer_vec[0], pos_s_vec[k-1], diversity_pos_s[k-1]), 3))
                temp_vec.append(round(id_x_s(kmer_vec[0], neg_s_vec[k-1], diversity_neg_s[k-1]), 3))

            vec.append(temp_vec)

        return vec