示例#1
0
class InsaneLearner(object):
    def __init__(self, verbose=False):
        self.learner = BagLearner(BagLearner, {
            "learner": LinRegLearner,
            "kwargs": {},
            "bags": 20,
            "verbose": verbose
        }, 20, False, verbose)

    def author(self):
        return 'sgarg96'

    def addEvidence(self, dataX, dataY):
        """
        @summary: Add training data to learner
        @param dataX: X values of data to add
        @param dataY: the Y training values
        """
        self.learner.addEvidence(dataX, dataY)

    def query(self, points):
        """
        @summary: Estimate a set of test points given the model we built.
        @param points: should be a numpy array with each row corresponding to
        a specific query.
        @returns the estimated values according to the saved model.
        """
        return self.learner.query(points)
示例#2
0
 def __init__(self, verbose=False):
     self.learner = BagLearner(BagLearner, {
         "learner": LinRegLearner,
         "kwargs": {},
         "bags": 20,
         "verbose": verbose
     }, 20, False, verbose)
示例#3
0
 def twentybags():
     learner20 = BagLearner(learner=RTLearner,
                            kwargs={"leaf_size": 1},
                            bags=20,
                            boost=False,
                            verbose=False)
     learner20.addEvidence(trainX, trainY)
     return learner20.query(testX)
示例#4
0
 def __init__(self, verbose=False, impact=0):
     self.verbose = verbose
     self.impact = impact
     self.learner = BagLearner(learner=RTLearner, kwargs={"leaf_size": 5},
                               bags=20, boost=False, verbose=False)
     self.lookback = 14
     self.lookforward = 14
     self.impact = impact
 def __init__(self, verbose=False, impact=0.0):
     self.verbose = verbose
     self.impact = impact
     self.N = 10
     self.learner = BagLearner(learner=RTLearner,
                               kwargs={"leaf_size": 5},
                               bags=20,
                               boost=False,
                               verbose=False)
 def rnd_name():
     np.random.seed(seed)
     random.seed(seed)
     np.random.seed=fake_seed
     random.seed = fake_rseed
     learner = BagLearner(learner=il_cobj,kwargs={'verbose':False},bags=20,boost=False,verbose=False)
     learner.addEvidence(trainX,trainY)
     Y = learner.query(testX)
     np.random.seed = tmp_numpy_seed
     random.seed = tmp_random_seed
     return il_cobj.init_callcount_dict, il_cobj.add_callcount_dict, il_cobj.query_callcount_dict
 def twentybags():
     np.random.seed(seed)
     random.seed(seed)
     np.random.seed = fake_seed
     random.seed = fake_rseed
     learner20 = BagLearner(learner=RTLearner,kwargs={"leaf_size":1},bags=20,boost=False,verbose=False)
     learner20.addEvidence(trainX,trainY)
     q_rv = learner20.query(testX)
     np.random.seed = tmp_numpy_seed
     random.seed = tmp_random_seed
     return q_rv
示例#8
0
    def __init__(self, verbose = False, impact=0.0):
        self.verbose = verbose
        self.impact = impact
        self.symbol = None
        self.Ytrain = None
        num_states = 500
        num_actions = 3

        leafSize = 20
        verbose = False
        baglearner = BagLearner(RTLearner, kwargs = {"leaf_size":20, "verbose":False}, bags = 10, boost = False, verbose=False)
        #qleaner = QLearner(num_states, num_actions, alpha, gamma, rar, radr, dyna, verbose)
        self.learner = baglearner
示例#9
0
 def onebag():
     learner1 = BagLearner(learner=RTLearner,
                           kwargs={"leaf_size": 1},
                           bags=1,
                           boost=False,
                           verbose=False)
     learner1.addEvidence(trainX, trainY)
     return learner1.query(testX), learner1.author()
示例#10
0
def test_bagging(trainX,
                 trainY,
                 testX,
                 testY,
                 should_plot=False,
                 max_size=None):
    bound = trainX.shape[0] // 5
    if max_size is not None:
        bound = min(max_size, bound)

    bags = [1, 10, 25]
    rmses = np.zeros((len(bags), bound))
    xrng = np.arange(bound)
    baseline = np.zeros((bound, ))

    # DTLearner without bagging
    for i in xrng:
        learner = DTLearner(leaf_size=i)
        learner.addEvidence(trainX, trainY)
        predY = learner.query(testX)
        baseline[i] = math.sqrt(((testY - predY)**2).sum() / testY.shape[0])

    # DTLearner with bagging
    for i, cnt in enumerate(bags):
        for j in xrng:
            learner = BagLearner(learner=DTLearner,
                                 bags=cnt,
                                 kwargs={'leaf_size': j})
            learner.addEvidence(trainX, trainY)
            predY = learner.query(testX)
            rmses[i][j] = math.sqrt(
                ((testY - predY)**2).sum() / testY.shape[0])

    if should_plot:
        # plot RMSE vs leaf size for each bag case
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(xrng, baseline, label='DTLearner')
        for i, cnt in enumerate(bags):
            ax.plot(xrng, rmses[i], label=f'bags={cnt}')

        ax.set_xlabel('Leaf Size', fontweight='bold')
        ax.set_ylabel('RMSE', fontweight='bold')

        plt.legend()
        plt.savefig('dtl_bagging_fig.png')
        plt.clf()
 def onebag():
     np.random.seed(seed)
     random.seed(seed)
     np.random.seed = fake_seed
     random.seed = fake_rseed
     learner1 = BagLearner(learner=RTLearner,kwargs={"leaf_size":1},bags=1,boost=False,verbose=False)
     learner1.addEvidence(trainX,trainY)
     q_rv = learner1.query(testX)
     a_rv = learner1.author()
     np.random.seed = tmp_numpy_seed
     random.seed = tmp_random_seed
     return q_rv,a_rv
示例#12
0
def bgl_leaf_size_rmses(trainX,
                        trainY,
                        testX,
                        testY,
                        rmses_train,
                        rmses_test,
                        train_pct=0.6):
    trials = trainX.shape[0]
    leaf_rng = np.arange(1, trainX.shape[1] // 5)
    bag_rmses_train = np.zeros((trainX.shape[0], leaf_rng.shape[0]))
    bag_rmses_test = np.zeros((trainX.shape[0], leaf_rng.shape[0]))
    bags = 25

    for trial_idx in np.arange(trials):
        for leaf_idx, leaf_size in enumerate(leaf_rng):
            bgl = BagLearner(learner=DTLearner,
                             bags=bags,
                             kwargs=dict(leaf_size=leaf_size))
            bgl.addEvidence(trainX[trial_idx], trainY[trial_idx])

            train_predY = bgl.query(trainX[trial_idx])
            test_predY = bgl.query(testX[trial_idx])

            trobs = trainY.shape[1]
            teobs = testY.shape[1]

            trv = math.sqrt(
                ((trainY[trial_idx] - train_predY)**2).sum() / trobs)
            tev = math.sqrt(((testY[trial_idx] - test_predY)**2).sum() / teobs)

            bag_rmses_train[trial_idx][leaf_idx] = trv
            bag_rmses_test[trial_idx][leaf_idx] = tev

    fig = plt.figure()
    ax = fig.add_subplot(111)

    ax.plot(leaf_rng,
            bag_rmses_train.mean(axis=0),
            label=f'IB ({train_pct*100:0.0f}%)')
    ax.plot(leaf_rng,
            bag_rmses_test.mean(axis=0),
            label=f'OOB ({(1-train_pct)*100:0.0f}%)')
    ax.plot(leaf_rng,
            bag_rmses_test.mean(axis=0) - bag_rmses_train.mean(axis=0),
            label=f'OOB-IB',
            c='m')

    ax.set_xlim((1, leaf_rng[-10]))
    ax.set_xlabel(f'Leaf Size', fontweight='bold')
    ax.set_ylabel(f'RMSE (avg over {trials} trials)', fontweight='bold')
    ax.set_title(f'BagLearner Generalization Error', fontweight='bold')
    plt.legend()
    plt.savefig('bgl_leaf_sizes_rmses_v1.png')
    plt.clf()

    ax = fig.add_subplot(111)
    dt_gen_err = rmses_test.mean(axis=0) - rmses_train.mean(axis=0)
    bag_gen_err = bag_rmses_test.mean(axis=0) - bag_rmses_train.mean(axis=0)
    ax.plot(leaf_rng, dt_gen_err - bag_gen_err, c='m')

    ax.set_xlim((1, leaf_rng[-10]))
    ax.set_xlabel('Leaf Size', fontweight='bold')
    ax.set_ylabel(f'RMSE (avg over {trials} trials)', fontweight='bold')
    ax.set_title(f'DTLearner - BagLearner Generalization Error',
                 fontweight='bold')
    plt.savefig('dtl_bgl_gen_err_v1.png')
    plt.clf()
示例#13
0
 def __init__(self, verbose=False):
     opts = {'learner': LinRegLearner, 'verbose': verbose, 'bags': 20}
     self.lrns = [BagLearner(**opts) for _ in range(20)]
示例#14
0
def compare_dt_rt(trainX,
                  trainY,
                  testX,
                  testY,
                  should_plot=False,
                  max_size=None,
                  data_title=''):
    bound = trainX.shape[0] // 5
    if max_size is not None:
        bound = min(max_size, bound)

    resids = np.zeros((2, testX.shape[0]))
    rsqrs = np.zeros((2, bound))
    aics = np.zeros((2, bound))
    stds = np.zeros((2, bound))
    xrng = np.arange(bound)
    n, k = trainX.shape
    for i in xrng:
        opts = {'leaf_size': i}
        learners = [
            BagLearner(learner=DTLearner, kwargs=opts, bags=10),
            BagLearner(learner=RTLearner, kwargs=opts, bags=10)
        ]
        for j, learner in enumerate(learners):
            learner.addEvidence(trainX, trainY)
            predY = learner.query(testX)
            if i == 0:
                resids[j] = (testY - predY)
            rsqrs[j][i] = np.corrcoef(predY, y=testY)[0, 1]**2
            aics[j][i] = 2 * k + n * np.log(((testY - predY)**2).sum() / n)
            stds[j][i] = np.std(testY - predY)

    if should_plot:
        # plot R squared, AIC, std vs leaf size
        if data_title != '':
            data_title = f'_{data_title}'

        fig = plt.figure()
        ax = fig.add_subplot(111)
        x = xrng + 1
        ax.plot(x, rsqrs[0], label='DTLearner')
        ax.plot(x, rsqrs[1], label='RTLearner')

        ax.set_xlabel('Leaf Size', fontweight='bold')
        ax.set_ylabel('R-Squared', fontweight='bold')
        ax.set_xbound(lower=1)

        plt.legend()
        plt.savefig(f'dtl_rtl_rsqr{data_title}.png')
        plt.clf()

        ax = fig.add_subplot(111)
        ax.plot(x, aics[0], label='DTLearner')
        ax.plot(x, aics[1], label='RTLearner')

        ax.set_xlabel('Leaf Size', fontweight='bold')
        ax.set_ylabel('AIC', fontweight='bold')
        ax.set_xbound(lower=1)

        plt.legend()
        plt.savefig(f'dtl_rtl_aic{data_title}.png')
        plt.clf()

        ax = fig.add_subplot(111)
        ax.plot(x, stds[0], label='DTLearner')
        ax.plot(x, stds[1], label='RTLearner')

        ax.set_xlabel('Leaf Size', fontweight='bold')
        ax.set_ylabel('STD', fontweight='bold')
        ax.set_xbound(lower=1)

        plt.legend()
        plt.savefig(f'dtl_rtl_std{data_title}.png')
        plt.clf()

        ax = fig.add_subplot(111)
        ax.plot(range(trainX.shape[0]), trainY, 'bo')

        ax.set_ylabel('Y', fontweight='bold')

        plt.savefig(f'y_vals{data_title}.png')
        plt.clf()

        ax = fig.add_subplot(111)
        ax.plot(testY, resids[0], 'bo')

        ax.set_ylabel('Residuals', fontweight='bold')
        ax.set_xlabel('Y', fontweight='bold')

        plt.savefig(f'resids{data_title}.png')
        plt.clf()
示例#15
0
class StrategyLearner(object):

    # constructor
    def __init__(self, verbose=False, impact=0):
        self.verbose = verbose
        self.impact = impact
        self.learner = BagLearner(learner=RTLearner, kwargs={"leaf_size": 5},
                                  bags=20, boost=False, verbose=False)
        self.lookback = 14
        self.lookforward = 14
        self.impact = impact

    def getFeatures(self, prices, symbol):
        sma_ratio = calc_sma_ratio(prices, self.lookback)
        bbratio = calc_bb_ratio(prices, self.lookback)
        momentum = calc_momentum(prices, self.lookback)
        sma_ratio.rename(columns={symbol: "smaratio"}, inplace=True)
        bbratio.rename(columns={symbol: "bbratio"}, inplace=True)
        momentum.rename(columns={symbol: "momentum"}, inplace=True)

        X = sma_ratio.join([bbratio, momentum])
        X.dropna(inplace=True)

        if self.verbose:
            print(X.shape, X.columns)
        return X

    def get_trades(self, predY):
        trades = []
        net_holdings = 0
        min_holdings = -1000
        max_holdings = 1000

        for i in range(predY.shape[0]):
            if predY[i] == -1 and net_holdings > min_holdings:  # sell
                num_shares = min_holdings - net_holdings
                trades.append(num_shares)
                net_holdings = min_holdings
            elif predY[i] == 1 and net_holdings < max_holdings:  # buy
                num_shares = max_holdings - net_holdings
                trades.append(num_shares)
                net_holdings = max_holdings
            else:
                trades.append(0)
        return trades

    def get_pos(self, x):
        if x > 0.05 + self.impact:
            return 1
        elif x < -0.03 - self.impact:
            return -1
        else:
            return 0

    def getTargetVariable(self, prices, symbol):
        returns = (prices.shift(-1 * self.lookforward) / prices - 1)
        returns = returns.dropna()
        signals = (returns[symbol].apply(lambda x: self.get_pos(x))).to_frame()
        return signals, returns

    # this method should create a QLearner, and train it for trading
    def addEvidence(self, symbol="JPM",
                    sd=dt.datetime(2008, 1, 1),
                    ed=dt.datetime(2009, 12, 31),
                    sv=100000):

        # add your code to do learning here

        # example usage of the old backward compatible util function
        syms = [symbol]
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data(syms, dates)  # automatically adds SPY
        prices = prices_all[syms]  # only portfolio symbols
        if self.verbose:
            print("Prices loaded")

        trainX = self.getFeatures(prices, symbol)
        trainY, returns = self.getTargetVariable(prices, symbol)

        self.data = trainX.join(trainY, how='outer').dropna()
        trainX = self.data[trainX.columns]
        trainY = self.data[trainY.columns]
        assert trainY.shape[0] == trainX.shape[0]

        if self.verbose:
            print("Data shapes", trainX.shape, trainY.shape)
            print("Starting learning")

        self.learner.addEvidence(trainX.to_numpy(), trainY.to_numpy())
        return prices, trainX, trainY, returns

    # this method should use the existing policy and test it against new data
    def testPolicy(self,
                   symbol="JPM",
                   sd=dt.datetime(2010, 1, 1),
                   ed=dt.datetime(2011, 12, 31),
                   sv=100000):
        if self.verbose:
            print("Testing policy")
        syms = [symbol]
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data(syms, dates)  # automatically adds SPY
        prices = prices_all[syms]  # only portfolio symbols

        testX = self.getFeatures(prices, symbol)
        predY = self.learner.query(testX.to_numpy())
        assert predY.shape[0] == testX.shape[0]

        if self.verbose:
            print(testX.shape, predY.shape)

        trades = self.get_trades(predY)
        df_trades = pd.DataFrame(trades, index=testX.index)
        return df_trades

    def author(self):
        return 'sgarg96'
示例#16
0
train = train[train.index.isin(pd.date_range('2006-01-01', '2009-12-31'))]

# get only fields needed to run machine learning algorithm
data_train_x = np.asarray(
    train[["EMA_30_Price_Ratio", "EMA_200_Price_Ratio", "SPY_RSI_14_Days"]])
data_train_y = np.asarray(train.ten_days_out.tolist())

# run decision tree algorithm on IBM's price data
#learner = RTLearner(leaf_size = 50, verbose = False) # constructor
#learner.addEvidence(data_train_x, data_train_y) # training step
#Y = learner.query(data_train_x) # query

# run bag learner algorithm on IBM's data
bag_learner = BagLearner(learner=RTLearner,
                         kwargs={"leaf_size": 50},
                         bags=15,
                         boost=False,
                         verbose=False)
bag_learner.addEvidence(data_train_x, data_train_y)
Y = bag_learner.query(data_train_x)

Y = np.asarray(Y)

to_buy = [x for x in np.where(Y >= .01)[0]]
to_sell = [x for x in np.where(Y <= -.01)[0]]

to_buy = [train.index[x] for x in to_buy]
to_sell = [train.index[x] for x in to_sell]

# add field showing ML predictions
train['ML_Prediction'] = train.index.map(ml_action)
示例#17
0
    plt.grid(True)
    plt.legend(loc="lower right")
    plt.title("RMSE of DTLearner with leaf size")
    plt.xlabel("Leaf size")
    plt.ylabel("RMSE")
    plt.xticks(np.arange(0, max_leaf, 5))
    plt.yticks(np.arange(0, 1, 0.1) * .01)
    plt.savefig("dt_learner_leaf.png", format="PNG")

    train_rmse = []
    test_rmse = []
    max_leaf = 50
    bags = 30

    for i in range(1, max_leaf + 1):
        learner = BagLearner(DTLearner, {"leaf_size": i}, bags)
        learner.addEvidence(trainX, trainY)
        train_rmse.append(calc_rmse(trainY, learner.query(trainX)))
        test_rmse.append(calc_rmse(testY, learner.query(testX)))

    plt.figure(figsize=(8, 6), dpi=80)
    plt.plot(np.arange(50)+1, train_rmse, label='Train RMSE', marker='o')
    plt.plot(np.arange(50)+1, test_rmse, label='Test RMSE', marker='o')
    plt.xlim(1, max_leaf)
    plt.grid(True)
    plt.legend(loc="lower right")
    plt.title("RMSE of BagLearner with leaf size")
    plt.xlabel("Leaf size")
    plt.ylabel("RMSE")
    plt.xticks(np.arange(0, max_leaf, 5))
    plt.yticks(np.arange(0, 1, 0.1) * .01)
  
train = train[train.index.isin(pd.date_range('2006-01-01','2009-12-31'))]


# get only fields needed to run machine learning algorithm
data_train_x = np.asarray(train[["EMA_30_Price_Ratio","EMA_200_Price_Ratio","SPY_RSI_14_Days"]])
data_train_y = np.asarray(train.ten_days_out.tolist())


# run decision tree algorithm on IBM's price data
#learner = RTLearner(leaf_size = 50, verbose = False) # constructor
#learner.addEvidence(data_train_x, data_train_y) # training step
#Y = learner.query(data_train_x) # query

# run bag learner algorithm on IBM's data
bag_learner = BagLearner(learner = RTLearner , kwargs = {"leaf_size":50} , bags = 15, 
                         boost = False, verbose = False)
bag_learner.addEvidence(data_train_x , data_train_y)
Y = bag_learner.query(data_train_x)

Y = np.asarray(Y)

to_buy = [x for x in np.where(Y >= .01)[0]]
to_sell = [x for x in np.where(Y <= -.01)[0]]

to_buy = [train.index[x] for x in to_buy]
to_sell = [train.index[x] for x in to_sell]

# add field showing ML predictions
train['ML_Prediction'] = train.index.map(ml_action)

示例#19
0
shiny_RTLearner = RTLearner(**kwargs)
shiny_RTLearner.addEvidence(dataX, dataY)

shiny_DTLearner.print_tree(shiny_DTLearner.tree)
shiny_RTLearner.print_tree(shiny_RTLearner.tree)

row = [[3, 3], [0, 5], [1, 3]]

print "deterministically ", shiny_DTLearner.query(row)
print "randomly ", shiny_RTLearner.query(row)

learner = BagLearner(learner=DTLearner,
                     kwargs={
                         "leaf_size": 1,
                         "verbose": False
                     },
                     bags=20,
                     boost=False,
                     verbose=False)
learner.addEvidence(dataX, dataY)
print learner.query(row)

learner = BagLearner(learner=RTLearner,
                     kwargs={
                         "leaf_size": 1,
                         "verbose": False
                     },
                     bags=20,
                     boost=False,
                     verbose=False)
learner.addEvidence(dataX, dataY)
示例#20
0
def dtbg_preds(trainX, trainY, testX, testY, train_pct=0.6):
    bag_rng = np.arange(1, 10)
    leaf_size = 6
    trials = 10
    bagdt_preds_train = np.zeros(
        (trainX.shape[0], bag_rng.shape[0], trainX.shape[1]))
    bagdt_preds_test = np.zeros(
        (testX.shape[0], bag_rng.shape[0], testX.shape[1]))
    bagrt_preds_train = np.zeros(
        (trainX.shape[0], bag_rng.shape[0], trainX.shape[1]))
    bagrt_preds_test = np.zeros(
        (testX.shape[0], bag_rng.shape[0], testX.shape[1]))

    for trial_idx in np.arange(trials):
        for bag_idx, bag_size in enumerate(bag_rng):
            bgl = BagLearner(learner=DTLearner,
                             bags=bag_size,
                             kwargs=dict(leaf_size=leaf_size))
            bgl.addEvidence(trainX[trial_idx], trainY[trial_idx])
            bgl2 = BagLearner(learner=RTLearner,
                              bags=bag_size,
                              kwargs=dict(leaf_size=leaf_size))
            bgl2.addEvidence(trainX[trial_idx], trainY[trial_idx])

            train_predY = bgl.query(trainX[trial_idx])
            test_predY = bgl.query(testX[trial_idx])
            train2_predY = bgl2.query(trainX[trial_idx])
            test2_predY = bgl2.query(testX[trial_idx])

            bagdt_preds_train[trial_idx][bag_idx] = train_predY
            bagdt_preds_test[trial_idx][bag_idx] = test_predY
            bagrt_preds_train[trial_idx][bag_idx] = train2_predY
            bagrt_preds_test[trial_idx][bag_idx] = test2_predY

    fig = plt.figure()
    ax = fig.add_subplot(111)
    bagdt_mean_var = (bagdt_preds_test.std(axis=0)**2).mean(axis=1)
    bagrt_mean_var = (bagrt_preds_test.std(axis=0)**2).mean(axis=1)

    ax.plot(bag_rng, bagdt_mean_var, label=f'DT OOB Var')
    ax.plot(bag_rng, bagrt_mean_var, label=f'DT OOB Var')

    ax.set_xlim((1, bag_rng[-1]))
    ax.set_xlabel('Bag Size', fontweight='bold')
    ax.set_ylabel('Prediction Variance', fontweight='bold')
    ax.set_title('DTLearner and RTLearner Prediction Variance',
                 fontweight='bold')
    plt.legend()
    plt.savefig('dtrt_pred_var_v1.png')
    plt.clf()
class StrategyLearner(object):

    # constructor
    def __init__(self, verbose=False, impact=0.0):
        self.verbose = verbose
        self.impact = impact
        self.N = 10
        self.learner = BagLearner(learner=RTLearner,
                                  kwargs={"leaf_size": 5},
                                  bags=20,
                                  boost=False,
                                  verbose=False)

    def create_x_data(self, prices):
        window = 10
        simple_moving_average = simple_moving_average_over_window(
            prices, window)
        simple_moving_std = simple_moving_std_over_window(prices, window)

        upper_bb, lower_bb = calculate_bollinger_bands(simple_moving_average,
                                                       simple_moving_std)
        momentum = calculate_momentum_over_window(prices, window)
        upper_diff_price = upper_bb - prices
        lower_diff_price = lower_bb - prices

        # pd.concat
        x_data = prices.join(simple_moving_average, lsuffix='_Normalized Price', rsuffix='_SMA') \
            .join(upper_diff_price, lsuffix='_', rsuffix='_upperband_diff') \
            .join(lower_diff_price, lsuffix='_', rsuffix='_lowerband_diff') \
            .join(momentum, lsuffix='_', rsuffix="_momentum")

        x_data.columns = [
            'norm_price', "sma", "upper_band_diff", "lower_band_diff",
            "momentum"
        ]
        # x_data = x_data.fillna(0)
        x_data = x_data.fillna(method='ffill')
        x_data = x_data.fillna(method='bfill')
        return x_data

    def addEvidence(self, symbol="JPM", \
                    sd=dt.datetime(2008, 1, 1), \
                    ed=dt.datetime(2008, 8, 1), \
                    sv=10000):

        syms = [symbol]
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data(syms, dates)  # automatically adds SPY
        prices_all = prices_all.fillna(method='ffill')
        prices_all = prices_all.fillna(method='bfill')
        prices_all.sort_index(axis=0)

        prices = prices_all[syms]  # only portfolio symbols
        prices_SPY = prices_all['SPY']  # only SPY, for comparison later
        if self.verbose: print prices

        # example use with new colname
        volume_all = ut.get_data(syms, dates,
                                 colname="Volume")  # automatically adds SPY
        volume = volume_all[syms]  # only portfolio symbols
        x_train = self.create_x_data(prices)

        # Create y labled data
        y_values = []
        for i in range(prices.shape[0] - 5):
            price_change = (prices.ix[i + 5, symbol] -
                            prices.ix[i, symbol]) / prices.ix[i, symbol]
            if price_change > (0.02 + self.impact):
                y_values.append(1)
            elif price_change < (-0.02 - self.impact):
                y_values.append(-1)
            else:
                y_values.append(0)
        y_values.extend([0, 0, 0, 0, 0])
        y_train = pd.DataFrame(data=y_values,
                               index=prices.index,
                               columns=['y_values'])

        self.learner.addEvidence(x_train.values, y_train.values)
        pass

    def testPolicy(self, symbol="JPM", \
                   sd=dt.datetime(2009, 1, 1), \
                   ed=dt.datetime(2010, 1, 1), \
                   sv=10000):

        # here we build a fake set of trades
        # your code should return the same sort of data
        syms = [symbol]
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data(syms, dates)  # automatically adds SPY
        prices_all = prices_all.fillna(method='ffill')
        prices_all = prices_all.fillna(method='bfill')
        # prices_all = prices_all / prices_all.ix[0,]
        prices_all.sort_index(axis=0)
        prices = prices_all[syms]  # only portfolio symbols
        x_test = self.create_x_data(prices)
        # Steps

        # Create x data with indicators

        # query learner
        y_test = self.learner.query(x_test.values)

        # create trades pd using signal and portfolio_position
        trade_shares = []
        portfolio_position = 0

        for i in range(0, len(prices) - 5):
            hint = y_test[i]
            if y_test[i] == -1:
                # do sell
                trade_shares.append(self.do_sell_trade(portfolio_position))
            elif y_test[i] == 1:
                # do buy
                trade_shares.append(self.do_buy_trade(portfolio_position))
            else:
                trade_shares.append(0)
            portfolio_position = portfolio_position + trade_shares[-1]
        trade_shares.extend([0, 0, 0, 0, 0])
        df_trades = pd.DataFrame(data=trade_shares,
                                 index=prices.index,
                                 columns=['orders'])
        # print(df_trades.values.tolist())
        return df_trades

    def do_buy_trade(self, portfolio_position):
        # buy
        if portfolio_position == 0:
            return 1000
        elif portfolio_position == -1000:
            return 2000
        elif portfolio_position == 1000:
            return 0

    def do_sell_trade(self, portfolio_position):
        if portfolio_position == 0:
            return -1000
        elif portfolio_position == -1000:
            return 0
        elif portfolio_position == 1000:
            return -2000