Пример #1
0
    def addEvidence(self, symbol="IBM",
                    sd=dt.datetime(2008, 1, 1),
                    ed=dt.datetime(2009, 1, 1),
                    sv=10000, n=1):
        # add your code to do learning here

        # Get price info
        syms = [symbol]
        adj_sd = sd - timedelta(days=30)
        dates = pd.date_range(adj_sd, ed)
        prices_all = ut.get_data(syms, dates)  # automatically adds SPY
        prices = prices_all[syms]  # only portfolio symbols

        # Calculate indicators
        df = add_all_indicators(prices, syms[0], add_helper_data=False)

        # Filter to time range
        df = df.loc[sd:ed, :].copy()

        # Get state df
        state_df = self.fit_transform_state(df, symbol, n_days=n)

        # Initialize Q-learner
        self._learner = QLearner(num_states=len(self._state_dict), num_actions=len(self._actions), dyna=self._dyna)

        # Fit learner
        for _ in range(self._epochs):
            self.fit_learner(state_df)
        return
Пример #2
0
 def __init__(self, verbose=False, impact=0.0):
     self.verbose = verbose
     self.impact = impact
     self.learner = QLearner(num_states=90000, num_actions=2, dyna=0)
     self.mom_mean = 0
     self.mom_std = 0
     self.long_mom_mean = 0
     self.long_mom_std = 0
     self.bollinger_bands = None
     self.rsi = None
     self.momentum = None
     self.cr = -100000
Пример #3
0
    def __init__(self, verbose=False, impact=0.0):
        self.verbose = verbose
        self.impact = impact

        self.learner = QLearner(num_states=1000, \
        num_actions = 3, \
        alpha = 0.2, \
        gamma = 0.9, \
        rar = 0.5, \
        radr = 0.99, \
        dyna = 0, \
        verbose = False)
Пример #4
0
def train_lunar_lander(env,
                       framework="pytorch",
                       hidden_layer_dimensions: List[int] = [128, 64],
                       use_dropout=False,
                       training_episode_count=2000,
                       alpha=1e-4,
                       gamma=0.99,
                       epsilon_start=1.0,
                       epsilon_decay=0.998,
                       epsilon_min=0.0,
                       replay_memory_size=2**16,
                       replay_sample_size=32,
                       training_start_memory_size=64,
                       mean_reward_recency=100,
                       model_saving_folder=join('.', "models")):
    dimensions = _get_dimensions(env, hidden_layer_dimensions)
    q_fun = _get_qfun(framework, dimensions, alpha, use_dropout=use_dropout)

    lunar_lander = QLearner(env=env,
                            q_fun=q_fun,
                            epsilon_decay=epsilon_decay,
                            epsilon_min=epsilon_min,
                            gamma=gamma)

    mean_reward, logs = lunar_lander.train(
        episode_count=training_episode_count,
        epsilon_start=epsilon_start,
        replay_memory=replay_memory_size,
        replay_sample_size=replay_sample_size,
        training_start_memory_size=training_start_memory_size,
        mean_reward_recency=mean_reward_recency)

    file_path_no_exname, _ = _get_file_path(
        folder_path=model_saving_folder,
        framework=framework,
        extension_name='.mod',
        hidden_layer_dimensions=hidden_layer_dimensions,
        mean_reward=mean_reward,
        alpha=alpha,
        gamma=gamma,
        epsilon_start=epsilon_start,
        epsilon_decay=epsilon_decay,
        epsilon_min=epsilon_min,
        replay_memory_size=replay_memory_size,
        replay_sample_size=replay_sample_size,
        use_dropout=use_dropout,
        timestamp=int(time()))

    q_fun.model.save(file_path_no_exname + '.mod')
    logs.to_csv(file_path_no_exname + '.csv', index=False)
Пример #5
0
 def __init__(self, max_position=100, starting_cash=100000):
     self.indutil = IndUtil()
     self.epochMax = 200
     self.epochMin = 20
     self.converged = False
     self.learner = QLearner(
         num_states=self.indutil.get_state_size(),
         num_actions=3,
         alpha=0.2,
         gamma=0.9,
         rar=0.5,
         radr=0.99,
         verbose=False
     )
     self.convergedata = collections.deque(maxlen=10)
     self.numshares = max_position
     self.starting_cash = float(starting_cash)
Пример #6
0
 def __init__(self, learners=None):
     self.cards = [i for i in range(52)]
     # fold, check, call, they can raise any tenth of the pot, or up to 10x the pot
     num_actions = 22
     #  pre-flop, flop, turn, river
     if learners is None:
         self.learners = [
             QLearner(num_actions=num_actions),
             QLearner(num_actions=num_actions),
             QLearner(num_actions=num_actions),
             QLearner(num_actions=num_actions)
         ]
     else:
         self.learners = learners
     self.state = hand_state()
     self.num_players = 0
     self.big_blind = -1
     self.choices = [[], [], [], []]
     self.pot = 0
     self.invested = 0
Пример #7
0
def Qlearning_Ltest():
    ''' Testing function for Q learning on L-track'''
    q = QLearner(0.5, 0.9, 0.9, "L")
    #q.track.show()
    q.train((32, 2, 0, 1))
    q.train((32, 3, 0, 1))
    q.train()
    q.train()
    for i in range(10):
        print(q.trial_run())
Пример #8
0
    def addEvidence(self, symbol = "IBM", \
        sd=dt.datetime(2008,1,1), \
        ed=dt.datetime(2009,1,1), \
        sv = 10000):

        stock_data = StockData(symbol, sd, ed)
        trading_state_factory = TradingStateFactory(
            stock_data, self._indicator_discretizer)

        self._learner = QLearner(num_states=trading_state_factory.num_states,
                                 num_actions=3,
                                 alpha=0.2,
                                 gamma=0.9,
                                 rar=0.5,
                                 radr=0.99,
                                 dyna=0)

        self._trading_environment.qlearner = self._learner
        self._trading_environment.trading_state_factory = trading_state_factory
        self._trading_environment.stock_data = stock_data
        self._trading_environment.trading_options = {
            'trading_dates': stock_data.trading_dates,
            'impact': self.impact
        }

        latest_cumulative_return = -999
        current_cumulative_return = 0
        episodes = 0

        # Run learning episodes until the cumulative return of the strategy has converged
        while np.abs(latest_cumulative_return -
                     current_cumulative_return) > 0.001:
            latest_cumulative_return = current_cumulative_return

            trades = self._trading_environment.run_learning_episode()
            orders = self._convert_trades_to_marketisim_orders(symbol, trades)

            portfolio_values = compute_portvals(
                orders,
                start_val=sv,
                commission=0.,
                impact=self.impact,
                prices=stock_data.price.copy(),
            )

            current_cumulative_return = self._compute_cumulative_return(
                portfolio_values)

            episodes += 1

        # Keep track of the number of training episodes
        self._metadata['training_episodes'] = episodes
Пример #9
0
def crash_test():
    ''' Testing function for crash detection in simulator '''
    q = QLearner(0.5, 0.9, "O")
    start = (3, 21)
    end = (7, 21)
    c = q.track.check_for_crash(start, end)
    if c:
        print("Crashed!")
        q.agent.set_state(c[0], c[1], 0, 0)
        q.track.show()
    else:
        print("Safe!")
        q.agent.set_state(end[0], end[1], 0, 0)
        q.track.show()
Пример #10
0
 def __init__(self, verbose=False, impact=0.0):
     self.verbose = verbose
     self.impact = impact
     self.symbol = None
     self.states = None
     num_states = 500
     num_actions = 3
     alpha = 0.5  # 0.2
     gamma = 0.75  # 0.8
     rar = 0.65  # 0.5
     radr = 0.99
     dyna = 0
     verbose = False
     qleaner = QLearner(num_states, num_actions, alpha, gamma, rar, radr,
                        dyna, verbose)
     self.learner = qleaner
Пример #11
0
        plt.figure()
        plot_df.plot(x='Iterations', y='Mean-V', title=title)
        plt.show()


if __name__ == '__main__':
    fl = FrozenLake(size=20)
    vi, vi_policy, it, t = fl.value_iteration()
    pi, pi_policy, _, _ = fl.policy_iteration(discount=0.9)

    print('Eval : Value Iteration Policy')
    fl.eval_policy(vi_policy, iterations=1000)
    print('Eval : Policy Iteration Policy')
    fl.eval_policy(pi_policy, iterations=1000)

    ql = QLearner(fl.name, fl.prob, fl.rewards)
    ql.q_learning_trials(trials=20, vi=vi, pi=pi)
    run_stats, ql_policy = ql.q_learning(gamma=0.999,
                                         alpha=0.45,
                                         alpha_decay=0.999907088,
                                         alpha_min=0.082682664,
                                         epsilon=0.968587999,
                                         epsilon_min=0.148113184,
                                         epsilon_decay=0.996218224,
                                         n_iter=60000,
                                         returnStats=True)
    fl.plotQlearn(run_stats, 'Frozen Lake - Q-Learning')
    # fl.eval_policy(ql_policy,iterations=1000)

    sys.exit()
Пример #12
0
from QLearner import QLearner
from QCharacter import QCharacter

from features1 import *

features = [
    distanceToExit, distanceToBomb, distanceToMonster, inBombExplosionRange,
    anyDroppedBombs
]
weights = [
    173.17464200348178, -1.8397384409939697, -28.549911042490006,
    -22.392729217308883, 0.050295801861828845
]

qlearner = QLearner(weights, features)
# Create the game
# TODO Change this if you want different random choices
g = Game.fromfile('map.txt')

g.add_monster(StupidMonster(
    "stupid",  # name
    "S",  # avatar
    3,
    9  # position
))

# TODO Add your character
q_character = QCharacter(
    "me",  # name
    "C",  # avatar
Пример #13
0
    return p1_stats


if __name__ == "__main__":

    # Example Usage
    # battle(Board(show_board=True, show_result=True), RandomPlayer(), RandomPlayer(), 1, learn=False, show_result=True)
    # battle(Board(), RandomPlayer(), RandomPlayer(), 100, learn=False, show_result=True)
    # battle(Board(), RandomPlayer(), SmartPlayer(), 100, learn=False, show_result=True)
    # battle(Board(), RandomPlayer(), PerfectPlayer(), 100, learn=False, show_result=True)
    # battle(Board(), SmartPlayer(), PerfectPlayer(), 100, learn=False, show_result=True)

    # ========================================================================
    # ** Initialization **
    # ========================================================================
    qlearner = QLearner()
    NUM = qlearner.GAME_NUM

    # ========================================================================
    # ** TRAIN: play 2*NUM games against players who only make random moves **
    # ========================================================================
    board = Board()
    battle(board, RandomPlayer(), qlearner, NUM, learn=True, show_result=False)
    battle(board, qlearner, RandomPlayer(), NUM, learn=True, show_result=False)

    # ========================================================================
    # ** TEST: play 1000 games against each opponent
    # ========================================================================
    q_rand = battle(board, qlearner, RandomPlayer(), 500)
    rand_q = battle(board, RandomPlayer(), qlearner, 500)
    q_smart = battle(board, qlearner, SmartPlayer(), 500)
Пример #14
0
class Agent:

    def __init__(self, max_position=100, starting_cash=100000):
        self.indutil = IndUtil()
        self.epochMax = 200
        self.epochMin = 20
        self.converged = False
        self.learner = QLearner(
            num_states=self.indutil.get_state_size(),
            num_actions=3,
            alpha=0.2,
            gamma=0.9,
            rar=0.5,
            radr=0.99,
            verbose=False
        )
        self.convergedata = collections.deque(maxlen=10)
        self.numshares = max_position
        self.starting_cash = float(starting_cash)

    """
    This will determine if the algorithm has converged (internal use only)
    It will look for less than a 5% change in the average of the last 10 port values
    """
    def has_converged(self, reward, epochs):
        self.convergedata.append(reward)
        if self.epochMin > epochs:
            return False
        elif abs(100*(np.average(self.convergedata)-reward)/reward) < 1:
            return True
        elif self.epochMax >= epochs:
            return True
        else:
            return False

    """
    This method is an internal method used by training to keep track of action effects
    """
    def take_action(self, df, symbol, index, action, cash, position, last_port_value):
        # 0 for hold, 1 for buy, 2 for sell
        price = df.loc[index][symbol]

        if action == 1 and position < self.numshares and cash >= self.numshares * price:
            position += self.numshares
            cash -= self.numshares * price
        elif action == 2 and position > 0:
            cash += position * price
            position = 0

        reward = ((cash + position*price)/last_port_value)-1
        port_value = cash + position*price
        return cash, position, reward, port_value

    """
    This will train the agents q-learner
    """
    def train(self, df, symbol):
        states = self.indutil.compute_indicators(df, symbol)
        epoch = 0
        last_epoch_value = self.starting_cash

        while not self.has_converged(last_epoch_value, epoch):
            position = 0
            cash = self.starting_cash
            last_port_value = cash

            action = self.learner.init_state(states.iloc[0].values[0])
            cash, position, reward, port_value \
                = self.take_action(df, symbol, states.index[0], action, cash, position, last_port_value)
            for index, state in states[1:].iterrows():
                action = self.learner.step(state.values[0], reward)
                last_port_value = port_value
                cash, position, reward, port_value \
                    = self.take_action(df, symbol, index, action, cash, position, last_port_value)

            price = df.iloc[-1][symbol]
            last_epoch_value = position * price + cash
            epoch += 1

    """
    This method will be called to test the learned policy.
    """
    def test(self, df, symbol):
        states = self.indutil.compute_indicators(df, symbol)
        trades = df[[symbol, ]]
        trades.values[:, :] = 0
        position = 0
        cash = self.starting_cash

        for index, state in states.iterrows():
            action = self.learner.query(state.values[0])
            price = df.loc[index][symbol]
            if action == 1 and cash >= price*self.numshares and position < self.numshares:
                trades.loc[index][symbol] = self.numshares
                position = self.numshares
                cash -= price*self.numshares
            elif action == 2 and position > 0:
                trades.loc[index][symbol] = -position
                cash += price * position
                position = 0

        return trades
Пример #15
0
class StrategyLearner(object):

    # constructor
    def __init__(self, verbose=False, impact=0.0):
        self.verbose = verbose
        self.impact = impact

        self.learner = QLearner(num_states=1000, \
        num_actions = 3, \
        alpha = 0.2, \
        gamma = 0.9, \
        rar = 0.5, \
        radr = 0.99, \
        dyna = 0, \
        verbose = False)

    # this method should create a QLearner, and train it for trading
    def addEvidence(self, symbol = "JPM", \
        sd=dt.datetime(2008,1,1), \
        ed=dt.datetime(2009,12,31), \
        sv = 100000):

        dates = pd.date_range(sd, ed)
        prices = ut.get_data([symbol], dates)[[symbol]]
        prices['Cash'] = 1.0
        high = ut.get_data([symbol], dates, colname='High')[[symbol]]
        low = ut.get_data([symbol], dates, colname='Low')[[symbol]]
        orig_close = ut.get_data([symbol], dates, colname='Close')[[symbol]]
        adj_high = high * prices[[symbol]] / orig_close
        adj_low = low * prices[[symbol]] / orig_close

        orders = pd.DataFrame().reindex_like(prices)
        orders = orders.rename(index=str,
                               columns={
                                   'Cash': 'Order',
                                   symbol: 'Shares'
                               })
        orders['Shares'] = 0
        orders['Order'] = 'CASH'
        orders.index.name = 'Date'
        orders.index = pd.to_datetime(orders.index, format="%Y/%m/%d")

        positions = pd.DataFrame().reindex_like(prices)
        positions.fillna(0, inplace=True)
        positions.iloc[0, -1] = sv
        action = self.learner.querysetstate(0)

        sma_range = indicators.sma(prices[[symbol]], 10).iloc[:, 0]
        sma_bins = pd.qcut(sma_range, 10, labels=False)
        bb_range = indicators.bb(prices[[symbol]])
        bb_range['value'] = bb_range.High - bb_range.Low
        bb_range.fillna(method='bfill', inplace=True)
        bb_range = bb_range['value']
        bb_bins = pd.qcut(bb_range, 10, labels=False)
        comparisons = [
            adj_high[symbol] - adj_low[symbol],
            abs(adj_low[symbol] - prices[symbol].shift(1)),
            abs(adj_high[symbol] - prices[symbol].shift(1))
        ]
        tr = pd.concat(comparisons, axis=1).max(axis=1)
        tr.fillna(method='bfill', inplace=True)
        atr_range = tr.rolling(14, min_periods=1).mean()
        atr_bins = pd.qcut(atr_range, 10, labels=False)
        states = sma_bins * 100 + bb_bins * 10 + atr_bins
        states = atr_bins * 100 + bb_bins * 10 + sma_bins
        #states = pd.qcut(sma_range, 500, labels=False) # for experiment 1 to compare with manual strategy

        pre_shares = 0
        normalized_close = prices[symbol] / prices.iloc[0, 0]
        daily_return = normalized_close - normalized_close.shift(1)
        daily_return.fillna(method='bfill', inplace=True)
        pre_orders = orders.copy()
        pre_orders.iloc[0, 0] = 1000

        while not orders.equals(pre_orders):  # check if converges
            pre_cash = sv
            pre_holdings = 0
            pre_orders = orders.copy()
            for date, row in orders.iterrows():
                cur_state = states[date]
                reward = daily_return[date] * pre_holdings * (1 - self.impact)
                action = self.learner.query(cur_state, reward)
                orders.loc[date, 'Order'] = action
                if action == 0:
                    orders.loc[date, 'Shares'] = -1000 - pre_shares
                    positions.loc[date, symbol] = -1000
                if action == 1:
                    orders.loc[date, 'Shares'] = 0 - pre_shares
                    positions.loc[date, symbol] = 0
                if action == 2:
                    orders.loc[date, 'Shares'] = 1000 - pre_shares
                    positions.loc[date, symbol] = 1000

                positions.loc[date, 'Cash'] = pre_cash - orders.loc[
                    date, 'Shares'] * prices.loc[date, symbol]
                pre_cash = positions.loc[date, 'Cash']
                pre_holdings = positions.loc[date, symbol]
            #cur_return = ((positions * prices).sum(axis=1).iloc[-1] - sv )/sv

        # add your code to do learning here

        # example usage of the old backward compatible util function
        # syms=[symbol]
        # dates = pd.date_range(sd, ed)
        # prices_all = ut.get_data(syms, dates)  # automatically adds SPY
        # prices = prices_all[syms]  # only portfolio symbols
        # prices_SPY = prices_all['SPY']  # only SPY, for comparison later
        # if self.verbose: print prices

        # example use with new colname
        # volume_all = ut.get_data(syms, dates, colname = "Volume")  # automatically adds SPY
        # volume = volume_all[syms]  # only portfolio symbols
        # volume_SPY = volume_all['SPY']  # only SPY, for comparison later
        # if self.verbose: print volume

    # this method should use the existing policy and test it against new data
    def testPolicy(self, symbol = "JPM", \
        sd=dt.datetime(2010,1,1), \
        ed=dt.datetime(2011,12,31), \
        sv = 100000):

        dates = pd.date_range(sd, ed)
        prices = ut.get_data([symbol], dates)[[symbol]]
        high = ut.get_data([symbol], dates, colname='High')[[symbol]]
        low = ut.get_data([symbol], dates, colname='Low')[[symbol]]
        orig_close = ut.get_data([symbol], dates, colname='Close')[[symbol]]
        adj_high = high * prices[[symbol]] / orig_close
        adj_low = low * prices[[symbol]] / orig_close

        sma_range = indicators.sma(prices[[symbol]], 10).iloc[:, 0]
        sma_bins = pd.qcut(sma_range, 10, labels=False)
        bb_range = indicators.bb(prices[[symbol]])
        bb_range['value'] = bb_range.High - bb_range.Low
        bb_range.fillna(method='bfill', inplace=True)
        bb_range = bb_range['value']
        bb_bins = pd.qcut(bb_range, 10, labels=False)
        comparisons = [
            adj_high[symbol] - adj_low[symbol],
            abs(adj_low[symbol] - prices[symbol].shift(1)),
            abs(adj_high[symbol] - prices[symbol].shift(1))
        ]
        tr = pd.concat(comparisons, axis=1).max(axis=1)
        tr.fillna(method='bfill', inplace=True)
        atr_range = tr.rolling(14, min_periods=1).mean()
        atr_bins = pd.qcut(atr_range, 10, labels=False)
        states = sma_bins * 100 + bb_bins * 10 + atr_bins
        states = atr_bins * 100 + bb_bins * 10 + sma_bins
        #states = pd.qcut(sma_range, 1000, labels=False) # for experiment 1 to compare with manual strategy
        trades = prices.copy()
        pre_position = 0
        for date, row in trades.iterrows():
            cur_state = states[date]  # compute current state
            action = self.learner.querysetstate(cur_state)
            if action == 0:
                trades.loc[date, symbol] = -1000 - pre_position
                pre_position = -1000
            elif action == 1:
                trades.loc[date, symbol] = 0 - pre_position
                pre_position = 0
            elif action == 2:
                trades.loc[date, symbol] = 1000 - pre_position
                pre_position = 1000
        # here we build a fake set of trades
        # your code should return the same sort of data
        # dates = pd.date_range(sd, ed)
        # prices_all = ut.get_data([symbol], dates)  # automatically adds SPY
        # trades = prices_all[[symbol,]]  # only portfolio symbols
        # trades_SPY = prices_all['SPY']  # only SPY, for comparison later
        # trades.values[:,:] = 0 # set them all to nothing
        # trades.values[0,:] = 1000 # add a BUY at the start
        # trades.values[40,:] = -1000 # add a SELL
        # trades.values[41,:] = 1000 # add a BUY
        # trades.values[60,:] = -2000 # go short from long
        # trades.values[61,:] = 2000 # go long from short
        # trades.values[-1,:] = -1000 #exit on the last day
        if self.verbose: print(type(trades))  # it better be a DataFrame!
        if self.verbose: print(trades)
        if self.verbose: print(prices_all)
        return trades

        def author(self):
            return 'hwang404'
Пример #16
0
    def addEvidence(self, symbol = "IBM", \
        sd=dt.datetime(2008,1,1), \
        ed=dt.datetime(2009,1,1), \
        sv = 10000):

        # add your code to do learning here
        # get data
        data = self.getData(sd, ed, symbol)

        # get indicator data
        indicator_m = MomentumIndicator(data.iloc[:, 0],
                                        n=8).calculate_helper_data()
        indicator_bb = BollingerBandIndicator(data.iloc[:, 0], 20,
                                              2).calculate_helper_data()
        indicator_s = StochasticIndicator(data, 14).calculate_helper_data()

        # Get bins
        self.bin_momentum = pd.qcut(indicator_m.iloc[:, 1],
                                    4,
                                    retbins=True,
                                    duplicates="drop")[1]
        self.bin_bollinger = pd.qcut(indicator_bb.iloc[:, 1] -
                                     indicator_bb.iloc[:, 3],
                                     4,
                                     retbins=True,
                                     duplicates="drop")[1]
        self.bin_stochastic = pd.qcut(indicator_s.iloc[:, 1],
                                      4,
                                      retbins=True,
                                      duplicates="drop")[1]
        self.bin_price = pd.qcut(data.iloc[:, 0].diff(periods=1),
                                 3,
                                 retbins=True,
                                 duplicates="drop")[1]
        price_diff = data.iloc[:, 0].diff()
        momentums = np.digitize(indicator_m.iloc[:, 1], self.bin_momentum)
        bb_diffs = np.digitize(
            indicator_bb.iloc[:, 1] - indicator_bb.iloc[:, 3],
            self.bin_bollinger)
        stochastics = np.digitize(indicator_s.iloc[:, 1], self.bin_stochastic)
        price_features = np.digitize(price_diff, self.bin_price)

        # Set first 20 input to -1 as the algo requires 20 days to get started due to rolling window for bollinger band.
        momentums[:20] = -1
        bb_diffs[:20] = -1
        stochastics[:20] = -1
        price_features[:20] = -1
        # initialize learner
        num_states = self.getTotalNumberOfStates()
        self.learner = QLearner(num_states=num_states, \
                            num_actions = 3, \
                            alpha = 0.05, \
                            gamma = 0.9, \
                            rar = 0.99, \
                            radr = 0.999, \
                            dyna = 0, \
                            verbose = False)

        # loop day by day
        # create variables for loop
        date_list = data.index
        iter = 0
        max_iter = 25
        while iter <= max_iter:
            portfolio = pd.DataFrame(np.zeros((len(date_list), 1)),
                                     index=date_list)
            stock_shares = 0
            balance = sv
            total_value = sv
            commission = 0
            holdings = 0  # -1 for short, 0 for cash, 1 for long
            current_num_stocks = 0
            order = 0
            for i, day in enumerate(date_list):

                current_strtime = day.strftime('%Y-%m-%d')
                # TODO: update state and reward accordingly
                if i == 0:
                    price_feature = 0
                else:
                    price_feature = price_features[i]

                holdings = current_num_stocks // 1000
                #state = self.convertFeaturesToState(price_feature, indicator_m.loc[current_strtime], indicator_bb.loc[current_strtime], indicator_s.loc[current_strtime], holdings)
                state = self.convertFeaturesToState(price_feature,
                                                    momentums[i], bb_diffs[i],
                                                    stochastics[i], holdings)
                reward = self.calculateReward(total_value, order)
                # Get action
                action = self.learner.query(state, reward) - 1
                order = 0
                if holdings != action:
                    # long
                    if action == 1:
                        order = 1000 - current_num_stocks
                        current_num_stocks = 1000
                    # short
                    elif action == -1:
                        order = -1000 - current_num_stocks
                        current_num_stocks = -1000
                    # do nothing
                    elif action == 0:
                        order = 0 - current_num_stocks
                        current_num_stocks = 0
                # If there are order
                if order != 0:

                    # get order details
                    order_num_of_shares = order
                    current_symbol_price = data.loc[current_strtime].iloc[0]

                    balance += -1 * order_num_of_shares * current_symbol_price
                    stock_shares += order_num_of_shares
                    # Minus commission
                    if order_num_of_shares != 0:
                        balance -= commission
                        # Minus impact
                        balance -= abs(order_num_of_shares
                                       ) * current_symbol_price * self.impact

                # Update portfolio with balance and current stock worth
                stock_value = data.loc[current_strtime].iloc[0] * stock_shares
                total_value = balance + stock_value

                portfolio.loc[current_strtime] = total_value

            # current_ret = (portfolio.iloc[-1] / portfolio.iloc[0]).iloc[0]
            iter += 1
Пример #17
0
class StrategyLearner(object):

    # constructor
    def __init__(self, verbose=False, impact=0.0):
        self.verbose = verbose
        self.impact = impact

    # this method should create a QLearner, and train it for trading
    def addEvidence(self, symbol = "IBM", \
        sd=dt.datetime(2008,1,1), \
        ed=dt.datetime(2009,1,1), \
        sv = 10000):

        # add your code to do learning here
        # get data
        data = self.getData(sd, ed, symbol)

        # get indicator data
        indicator_m = MomentumIndicator(data.iloc[:, 0],
                                        n=8).calculate_helper_data()
        indicator_bb = BollingerBandIndicator(data.iloc[:, 0], 20,
                                              2).calculate_helper_data()
        indicator_s = StochasticIndicator(data, 14).calculate_helper_data()

        # Get bins
        self.bin_momentum = pd.qcut(indicator_m.iloc[:, 1],
                                    4,
                                    retbins=True,
                                    duplicates="drop")[1]
        self.bin_bollinger = pd.qcut(indicator_bb.iloc[:, 1] -
                                     indicator_bb.iloc[:, 3],
                                     4,
                                     retbins=True,
                                     duplicates="drop")[1]
        self.bin_stochastic = pd.qcut(indicator_s.iloc[:, 1],
                                      4,
                                      retbins=True,
                                      duplicates="drop")[1]
        self.bin_price = pd.qcut(data.iloc[:, 0].diff(periods=1),
                                 3,
                                 retbins=True,
                                 duplicates="drop")[1]
        price_diff = data.iloc[:, 0].diff()
        momentums = np.digitize(indicator_m.iloc[:, 1], self.bin_momentum)
        bb_diffs = np.digitize(
            indicator_bb.iloc[:, 1] - indicator_bb.iloc[:, 3],
            self.bin_bollinger)
        stochastics = np.digitize(indicator_s.iloc[:, 1], self.bin_stochastic)
        price_features = np.digitize(price_diff, self.bin_price)

        # Set first 20 input to -1 as the algo requires 20 days to get started due to rolling window for bollinger band.
        momentums[:20] = -1
        bb_diffs[:20] = -1
        stochastics[:20] = -1
        price_features[:20] = -1
        # initialize learner
        num_states = self.getTotalNumberOfStates()
        self.learner = QLearner(num_states=num_states, \
                            num_actions = 3, \
                            alpha = 0.05, \
                            gamma = 0.9, \
                            rar = 0.99, \
                            radr = 0.999, \
                            dyna = 0, \
                            verbose = False)

        # loop day by day
        # create variables for loop
        date_list = data.index
        iter = 0
        max_iter = 25
        while iter <= max_iter:
            portfolio = pd.DataFrame(np.zeros((len(date_list), 1)),
                                     index=date_list)
            stock_shares = 0
            balance = sv
            total_value = sv
            commission = 0
            holdings = 0  # -1 for short, 0 for cash, 1 for long
            current_num_stocks = 0
            order = 0
            for i, day in enumerate(date_list):

                current_strtime = day.strftime('%Y-%m-%d')
                # TODO: update state and reward accordingly
                if i == 0:
                    price_feature = 0
                else:
                    price_feature = price_features[i]

                holdings = current_num_stocks // 1000
                #state = self.convertFeaturesToState(price_feature, indicator_m.loc[current_strtime], indicator_bb.loc[current_strtime], indicator_s.loc[current_strtime], holdings)
                state = self.convertFeaturesToState(price_feature,
                                                    momentums[i], bb_diffs[i],
                                                    stochastics[i], holdings)
                reward = self.calculateReward(total_value, order)
                # Get action
                action = self.learner.query(state, reward) - 1
                order = 0
                if holdings != action:
                    # long
                    if action == 1:
                        order = 1000 - current_num_stocks
                        current_num_stocks = 1000
                    # short
                    elif action == -1:
                        order = -1000 - current_num_stocks
                        current_num_stocks = -1000
                    # do nothing
                    elif action == 0:
                        order = 0 - current_num_stocks
                        current_num_stocks = 0
                # If there are order
                if order != 0:

                    # get order details
                    order_num_of_shares = order
                    current_symbol_price = data.loc[current_strtime].iloc[0]

                    balance += -1 * order_num_of_shares * current_symbol_price
                    stock_shares += order_num_of_shares
                    # Minus commission
                    if order_num_of_shares != 0:
                        balance -= commission
                        # Minus impact
                        balance -= abs(order_num_of_shares
                                       ) * current_symbol_price * self.impact

                # Update portfolio with balance and current stock worth
                stock_value = data.loc[current_strtime].iloc[0] * stock_shares
                total_value = balance + stock_value

                portfolio.loc[current_strtime] = total_value

            # current_ret = (portfolio.iloc[-1] / portfolio.iloc[0]).iloc[0]
            iter += 1

    # this method should use the existing policy and test it against new data
    def testPolicy(self, symbol = "IBM", \
        sd=dt.datetime(2009,1,1), \
        ed=dt.datetime(2010,1,1), \
        sv = 10000):

        data = self.getData(sd, ed, symbol)

        # get indicator data
        indicator_m = MomentumIndicator(data.iloc[:, 0],
                                        n=8).calculate_helper_data()
        indicator_bb = BollingerBandIndicator(data.iloc[:, 0], 20,
                                              2).calculate_helper_data()
        indicator_s = StochasticIndicator(data, 14).calculate_helper_data()

        momentums = np.digitize(indicator_m.iloc[:, 1], self.bin_momentum)
        bb_diffs = np.digitize(
            indicator_bb.iloc[:, 1] - indicator_bb.iloc[:, 3],
            self.bin_bollinger)
        stochastics = np.digitize(indicator_s.iloc[:, 1], self.bin_stochastic)
        price_features = np.digitize(data.iloc[:, 0].diff(), self.bin_price)

        # Set first 20 input to -1 as the algo requires 20 days to get started due to rolling window for bollinger band.
        momentums[:20] = -1
        bb_diffs[:20] = -1
        stochastics[:20] = -1
        price_features[:20] = -1

        trade_df = pd.DataFrame(index=data.index, columns=[symbol])
        current_holdings = 0
        action = 0
        for i in range(data.shape[0]):
            if i == 0:
                price_feature = 0
            else:
                price_feature = price_features[i]

            #state = self.convertFeaturesToState(price_feature, indicator_m.iloc[i],indicator_bb.iloc[i],indicator_s.iloc[i],current_holdings // 1000)
            state = self.convertFeaturesToState(price_feature, momentums[i],
                                                bb_diffs[i], stochastics[i],
                                                current_holdings // 1000)
            action = self.learner.querysetstate(state) - 1
            # buy
            if action == 1:
                trade_df.iloc[i] = 1000 - current_holdings
                current_holdings = 1000
            # sell
            elif action == -1:
                trade_df.iloc[i] = -1000 - current_holdings
                current_holdings = -1000
            # do nothing
            elif action == 0:
                trade_df.iloc[i] = 0 - current_holdings
                current_holdings = 0
        # return trade_df
        if self.verbose: print(type(trade_df))  # it better be a DataFrame!
        if self.verbose: print(trade_df)
        #if self.verbose: print(prices_all)
        return trade_df

    def getTotalNumberOfStates(self):
        # + 1 is for the state when some values are not yet calculated
        return 5 * 5 * 5 * 3 * 4 + 1

    # stochastic -> Value from 0-100
    # bollinger -> 3 Values. SMA + 2*STD, SMA and SMA - 2*STD
    # momentum -> value around -0.5 to 0.5
    # holding -> -1, 0 or 1
    # Col 0 is always adjusted close
    # Features starts from col 1
    def convertFeaturesToState(self, price, momentum, bollinger, stochastic,
                               holding):
        if np.isnan(momentum) or np.isnan(bollinger) or np.isnan(
                stochastic
        ) or momentum == -1 or bollinger == -1 or stochastic == -1 or price == -1:
            return self.getTotalNumberOfStates() - 1

        # These values starts from 1. -1 to make it start from 0
        momentum -= 1
        bollinger -= 1
        stochastic -= 1
        price -= 1
        # Holding starts from -1, +1 to make it start from 0
        holding += 1
        return momentum + stochastic * 5 + bollinger * 25 + holding * 125 + price * 375

    def calculateReward(self, total_value, prev_day_order):
        try:
            return_val = (total_value - self.prev_total_value)
            self.prev_total_value = total_value
            return return_val
        except AttributeError:
            self.prev_total_value = total_value

        return 0

    def getData(self, start_date, end_date, symbol):
        date_list = pd.date_range(start_date, end_date)
        d = ut.get_data([symbol], date_list)[symbol]
        close = ut.get_data([symbol], date_list, colname="Close")[symbol]
        high = ut.get_data([symbol], date_list, colname="High")[symbol]
        low = ut.get_data([symbol], date_list, colname="Low")[symbol]
        data = pd.DataFrame(index=d.index,
                            columns=["Adj Close", "Close", "High", "Low"])
        data.iloc[:, 0] = d
        data.iloc[:, 1] = close
        data.iloc[:, 2] = high
        data.iloc[:, 3] = low
        data = data.fillna(method="ffill")
        data = data.fillna(method="bfill")
        return data

    # This method is to fulfill inheritance requirement for strategy
    def getStrategyName(self):
        return "Q Learning Strategy"

    def author(self):
        return "jkok7"
Пример #18
0
        print('_' * 60)
        print()

    return p1_stats


if __name__ == "__main__":

    # Example Usage
    # battle(Board(show_board=True, show_result=True), RandomPlayer(), RandomPlayer(), 1, learn=False, show_result=True)
    # battle(Board(), RandomPlayer(), RandomPlayer(), 100, learn=False, show_result=True)
    # battle(Board(), RandomPlayer(), SmartPlayer(), 100, learn=False, show_result=True)
    # battle(Board(), RandomPlayer(), PerfectPlayer(), 100, learn=False, show_result=True)
    # battle(Board(), SmartPlayer(), PerfectPlayer(), 100, learn=False, show_result=True)

    qlearner = QLearner()
    qlearner = QLearnerXO()
    NUM = qlearner.GAME_NUM
    # NUM = 10

    # train: play NUM games against players who only make random moves
    print('Training QLearner against RandomPlayer for {} times......'.format(
        NUM))
    board = Board()
    qlearner.epsilon = 0.5
    qlearner.alpha = 0.5
    qlearner.varyA_E = True

    # battle(board, RandomPlayer(), qlearner, NUM, learn=True, show_result=False)
    # print(qlearner.epsilon, qlearner.alpha)
    # qlearner.epsilon = 0
Пример #19
0
from poker_learner import poker_learner
from QLearner import QLearner
from poker_learner import poker_learner
learners = []
for i in range(4):
    f = open("./learners/" + str(i), 'r')
    learners.append(QLearner())
    learners[i].Q = eval(f.read())
    f.close()
p = poker_learner(learners=learners)
Пример #20
0
def Qlearning_Rtest():
    ''' Testing function for Q learning on R-track'''
    q = QLearner(0.5, 0.9, 0.9, "R", True)
    q.track.show()
    q.train((23, 11, 1, -1))
    q.train((12, 17, 1, -1))
    q.train((13, 17, 1, -1))
    q.train((21, 25, 0, -1))
    q.train((21, 23, -1, -1))
    q.train((5, 24, 1, 0))
    q.train((4, 23, 1, 1))
    q.train()
    q.train()
    for i in range(10):
        print(q.trial_run())
Пример #21
0
class StrategyLearner(object):

    # constructor
    def __init__(self, verbose=False, impact=0.0):
        self.verbose = verbose
        self.impact = impact
        self.learner = QLearner(num_states=90000, num_actions=2, dyna=0)
        self.mom_mean = 0
        self.mom_std = 0
        self.long_mom_mean = 0
        self.long_mom_std = 0
        self.bollinger_bands = None
        self.rsi = None
        self.momentum = None
        self.cr = -100000

    def author(self):
        return 'nlerner3'

    # this method should create a QLearner, and train it for trading
    def addEvidence(self, symbol = "IBM", \
        sd=dt.datetime(2008,1,1), \
        ed=dt.datetime(2009,1,1), \
        sv = 10000):
        self.cash = sv
        start = sd - dt.timedelta(days=60)
        dates = pd.date_range(start, ed)
        data = ut.get_data([symbol], dates)
        data.fillna(method="ffill", inplace=True)
        data.fillna(method="bfill", inplace=True)
        prices = indicators.normalized_prices(data[symbol])

        self.bollinger_bands = None
        self.momentum = None

        self.rsi = indicators.rolling_rsi(prices, 5)
        self.bollinger_bands = indicators.bollinger_bands(prices, 15)
        self.momentum = indicators.rolling_momentum(prices, 2)

        self.mom_mean = self.momentum.mean()
        self.mom_std = self.momentum.std()

        last_action = 2
        current_state = self.create_state(sd, prices[:sd], last_action)
        action = self.learner.querysetstate(current_state)
        trades = pd.DataFrame(index=prices[sd:].index, columns=[symbol])
        keepgoing = True
        iteration = 0
        while keepgoing:
            for i in trades.index:
                reward = self.get_reward(action, last_action, prices[:i])
                last_action = action
                current_state = self.create_state(i, prices[:i], last_action)
                action = self.learner.query(current_state, reward)
            cr = self.cash / sv - 1
            if cr == self.cr or iteration == 20:
                keepgoing = False
            self.cr = cr
            iteration += 1

    def get_reward(self, action, last_action, prices_to_date):
        delta = prices_to_date.ix[-1] - prices_to_date.ix[-2]
        gains = 0
        if action == 0:
            gains = -1000 * delta
        if action == 1:
            gains += 1000 * delta
        if action != last_action:
            gains = gains - (1000 * prices_to_date.ix[-1] * self.impact)
        self.cash = self.cash + gains
        return gains

    def create_state(self, date, prices_to_date, prev_action):
        rsi = self.rsi_state(date)
        mom = self.momentum_state(date)
        bollinger = self.bollinger_state(date, prices_to_date)
        return int("{}{}{}{}{}".format(bollinger, rsi[0], rsi[1], mom,
                                       prev_action))

    def rsi_state(self, date):
        rsi = self.rsi[:date]
        current_rsi = rsi[-1]
        last_rsi = rsi[-2]
        rsi_0 = int(current_rsi / 10)
        if rsi_0 > 9:
            rsi_0 = 9
        rsi_1 = int(last_rsi / 10)
        if rsi_1 > 9:
            rsi_1 = 9
        return (rsi_0, rsi_1)

    def momentum_state(self, date):
        rolling_momentum = self.momentum[:date][-1]
        zscore = (rolling_momentum - self.mom_mean) / self.mom_std
        state = int(round(zscore * 3 + 5))
        if state < 0:
            state = 0
        if state > 9:
            state = 9
        return state

    def bollinger_state(self, date, prices_to_date):
        roling_band = self.bollinger_bands[:date]
        current_price = prices_to_date[-1]
        last_price = prices_to_date[-2]
        current_band = roling_band.ix[-1]
        last_band = roling_band.ix[-2]
        current_price_state = 1
        last_price_state = 1
        if current_price > current_band['UPPER_BAND']:
            current_price_state += 1
        if current_price < current_band['LOWER_BAND']:
            current_price_state -= 1
        if last_price > last_band['UPPER_BAND']:
            last_price_state += 1
        if last_price < last_band['LOWER_BAND']:
            last_price_state -= 1
        # return (current_price_state, last_price_state)
        return current_price_state * 3 + last_price_state

    # this method should use the existing policy and test it against new data
    def testPolicy(self, symbol = "IBM", \
        sd=dt.datetime(2009,1,1), \
        ed=dt.datetime(2010,1,1), \
        sv = 10000):

        # here we build a fake set of trades
        # your code should return the same sort of data
        start = sd - dt.timedelta(days=60)
        dates = pd.date_range(start, ed)
        data = ut.get_data([symbol], dates)
        data.fillna(method="ffill", inplace=True)
        data.fillna(method="bfill", inplace=True)
        prices = indicators.normalized_prices(data[symbol])
        trades = pd.DataFrame(index=prices[sd:].index, columns=[symbol])

        self.rsi = indicators.rolling_rsi(prices, 5)
        self.bollinger_bands = indicators.bollinger_bands(prices, 15)
        self.momentum = indicators.rolling_momentum(prices, 2)

        action = 2
        holding = 0
        for i in range(len(trades)):
            prices_to_date = prices[:trades.index[i]]
            state = self.create_state(trades.index[i], prices_to_date, action)
            action = self.learner.querysetstate(state)
            if action is 0:
                trades.ix[i] = -1000 - holding
                holding = -1000
            if action is 1:
                trades.ix[i] = 1000 - holding
                holding = 1000
            if action is 2:
                trades.ix[i] = 0
        return trades
Пример #22
0
class StrategyLearner(object):

    # constructor 			  		 			 	 	 		 		 	  		   	  			  	
    def __init__(self, verbose=False, impact=0.0, dyna=200, epochs=3):
        self.verbose = verbose
        self.impact = impact
        self._z_params = dict()
        self._state_dict = dict()
        self._learner = None
        self._state_order = []
        self._actions = [-1, 0, 1]  # long 1, cash 0, short -1
        self._dyna = dyna
        self._epochs = epochs

    # this method should create a QLearner, and train it for trading 			  		 			 	 	 		 		 	  		   	  			  	
    def addEvidence(self, symbol="IBM",
                    sd=dt.datetime(2008, 1, 1),
                    ed=dt.datetime(2009, 1, 1),
                    sv=10000, n=1):
        # add your code to do learning here

        # Get price info
        syms = [symbol]
        adj_sd = sd - timedelta(days=30)
        dates = pd.date_range(adj_sd, ed)
        prices_all = ut.get_data(syms, dates)  # automatically adds SPY
        prices = prices_all[syms]  # only portfolio symbols

        # Calculate indicators
        df = add_all_indicators(prices, syms[0], add_helper_data=False)

        # Filter to time range
        df = df.loc[sd:ed, :].copy()

        # Get state df
        state_df = self.fit_transform_state(df, symbol, n_days=n)

        # Initialize Q-learner
        self._learner = QLearner(num_states=len(self._state_dict), num_actions=len(self._actions), dyna=self._dyna)

        # Fit learner
        for _ in range(self._epochs):
            self.fit_learner(state_df)
        return

    # this method should use the existing policy and test it against new data
    def testPolicy(self, symbol="IBM",
                   sd=dt.datetime(2009, 1, 1),
                   ed=dt.datetime(2010, 1, 1),
                   sv=10000):

        # Get price info
        syms = [symbol]
        adj_sd = sd - timedelta(days=30)
        dates = pd.date_range(adj_sd, ed)
        prices_all = ut.get_data(syms, dates)  # automatically adds SPY
        prices = prices_all[syms]  # only portfolio symbols

        # Calculate indicators
        df = add_all_indicators(prices, syms[0], add_helper_data=False)

        # Filter to time range
        df = df.loc[sd:ed, :].copy()

        # Get state df
        state_df = self.transform_state(df)

        trades_df = self.test_learner(state_df, symbol)

        return trades_df

    def fit_learner(self, state_df):
        # Single iteration through training data
        # Mix the order up so dyna doesn't overfit to early experiences
        # return nothing
        i = 0
        state = list(state_df.iloc[i].loc[self._state_order])
        position = self._actions[1]  # Action cash is position 0 at index 1
        state.append(position)
        state_num = self._state_dict[tuple(state)]
        action = self._learner.querysetstate(state_num)

        for i in range(1, state_df.shape[0]):
            orig_position = position
            # Calculate reward
            position = self._actions[action]
            reward = state_df.iloc[i]["reward"] * position

            # Adjust reward for impact
            # Depending on original position...
            # If the position changes, reduce reward by impact
            if orig_position != position:
                reward = reward - (abs(reward) * self.impact)

            # Calculate reward
            # Long (1) and price goes down...
            # return * 1
            # Long (1) and price goes up...
            # return * 1
            # Cash (0) and price goes down...
            # return * -1
            # Cash (0) and price goes up...
            # return * -1
            # Short (-1) and price goes down...
            # return * -1
            # Short (-1) and price goes up...
            # return * -1

            # Update state
            state = list(state_df.loc[:, self._state_order].iloc[i])
            state.append(position)
            state_num = self._state_dict[tuple(state)]
            # Query with new state and reward for last action
            action = self._learner.query(state_num, reward)

        return

    def test_learner(self, state_df, symbol):

        # Test Q-Learner in sample for cumulative return (create trades df)
        actions_list = []

        i = 0
        state = list(state_df.iloc[i].loc[self._state_order])
        position = self._actions[1]  # Action cash is position 0 at index 1
        state.append(position)
        state_num = self._state_dict[tuple(state)]
        action = self._learner.querysetstate(state_num)
        actions_list.append(self._actions[action])

        for i in range(state_df.shape[0]):
            position = self._actions[action]
            # Update state
            state = list(state_df.loc[:, self._state_order].iloc[i])
            state.append(position)
            state_num = self._state_dict[tuple(state)]
            action = self._learner.querysetstate(state_num)
            actions_list.append(self._actions[action])

        trades_df = pd.DataFrame(index=state_df.index, data=actions_list[:-1], columns=["action"])

        trades_df[symbol] = 0

        current_position = 0

        for i in trades_df.index:

            action = trades_df.loc[i, "action"]
            if action == current_position:
                trades_df.loc[i, symbol] = 0
            elif action == -1 and current_position == 1:
                trades_df.loc[i, symbol] = -2000
            elif action == 1 and current_position == -1:
                trades_df.loc[i, symbol] = 2000
            elif action == -1 and current_position == 0:
                trades_df.loc[i, symbol] = -1000
            elif action == 1 and current_position == 0:
                trades_df.loc[i, symbol] = 1000
            elif action == 0 and current_position == 1:
                trades_df.loc[i, symbol] = -1000
            elif action == 0 and current_position == -1:
                trades_df.loc[i, symbol] = 1000
            else:
                raise ValueError("Impossible")

            current_position = action

        return trades_df.drop("action", axis=1)

    def fit_transform_state(self, df, symbol, n_days):
        # Symbol specifies the price column name
        # Accepts df with price and indicators
        # Digitize indicators
        # Store z parameters for later use
        # Calculate reward
        # Create state dict for mapping digitized state to integer
        # Return state_df

        # Create state DF
        state_df = pd.DataFrame(index=df.index)

        # Add and digitize indicators

        # Calculate target: n-day future return
        state_df["reward"] = df[symbol].iloc[::-1].rolling(window=n_days + 1).apply(lambda x: (x[0] / x[-1]) - 1).iloc[::-1]

        state_values = []

        # How many states are there, and how do I map them to a single integer?
        # unique_boll = range(state_df["bollinger_band"].max() + 1)

        # Add bollinger band
        state_df["bollinger_band"], unique_n = self.digitize_bollinger(df["bollinger_band"])
        unique_boll = list(range(unique_n))
        state_values.append(unique_boll)
        self._state_order.append("bollinger_band")

        # Add divergence
        state_df["divergence"], unique_n = self.digitize_divergence(df["divergence"])
        unique_div = list(range(unique_n))
        state_values.append(unique_div)
        self._state_order.append("divergence")

        # Add momentum
        state_df["momentum"], unique_n = self.digitize_momentum(df["momentum"])
        unique_mom = list(range(unique_n))
        state_values.append(unique_mom)
        self._state_order.append("momentum")

        # Add D
        state_df["D"], unique_n = self.digitize_d(df["D"])
        unique_d = list(range(unique_n))
        state_values.append(unique_d)
        self._state_order.append("D")

        state_values.append(self._actions)

        # Drop NA's, without reward
        state_df = state_df.dropna()

        all_states = list(itertools.product(*state_values))
        self._state_dict = {k: v for v, k in enumerate(all_states)}
        return state_df

    def transform_state(self, df):
        # Accepts df with indicators
        # Digitize indicators using existing params
        # Return state_df

        # Create state DF
        state_df = pd.DataFrame(index=df.index)

        state_df["bollinger_band"], _ = self.digitize_bollinger(df["bollinger_band"])
        state_df["divergence"], _ = self.digitize_divergence(df["divergence"])
        state_df["momentum"], _ = self.digitize_momentum(df["momentum"])
        state_df["D"], _ = self.digitize_d(df["D"])

        return state_df

    @staticmethod
    def digitize_bollinger(indicator):

        bins = [-1.03, 0, 1.03]

        return np.digitize(indicator, bins), len(bins) + 1

    @staticmethod
    def digitize_divergence(indicator):

        bins = [-.25, 0, .25]

        return np.digitize(indicator, bins), len(bins) + 1

    @staticmethod
    def digitize_momentum(indicator):

        bins = [-.27, .27]

        return np.digitize(indicator, bins), len(bins) + 1

    @staticmethod
    def digitize_d(indicator):
        bins = [.2, .8]
        return np.digitize(indicator, bins), len(bins) + 1

    def author(self):
        return 'cfarr31'
Пример #23
0
def test_lunar_lander(env,
                      framework="pytorch",
                      repeat=10,
                      hidden_layer_dimensions: List[int] = [128, 64],
                      test_episode_count=100,
                      alpha=1e-4,
                      gamma=0.99,
                      epsilon_start=1.0,
                      epsilon_decay=0.998,
                      epsilon_min=0.0,
                      replay_memory_size=2**16,
                      replay_sample_size=32,
                      model_saving_folder=join('.', "models"),
                      model_file=None,
                      render=False):
    if model_file:
        files = [join(model_saving_folder, model_file)]
    else:
        _, files = _get_file_path(
            folder_path=model_saving_folder,
            framework=framework,
            extension_name='.mod',
            hidden_layer_dimensions=hidden_layer_dimensions,
            mean_reward=None,
            alpha=alpha,
            gamma=gamma,
            epsilon_start=epsilon_start,
            epsilon_decay=epsilon_decay,
            epsilon_min=epsilon_min,
            replay_memory_size=replay_memory_size,
            replay_sample_size=replay_sample_size,
            use_dropout=False,
            timestamp=None)
    if files:
        dimensions = _get_dimensions(env, hidden_layer_dimensions)
        logs = True
        for file in files:
            mean_rewards = [None] * repeat
            start_episode_idx = 0
            print("==========test model '{}'==========".format(basename(file)))
            for repeat_idx in range(repeat):
                q_fun = _get_qfun(framework, dimensions, alpha, file)

                lunar_lander = QLearner(env=env,
                                        q_fun=q_fun,
                                        epsilon_decay=epsilon_decay,
                                        epsilon_min=epsilon_min,
                                        gamma=gamma)

                mean_reward, logs = lunar_lander.test(
                    start_episode_idx=start_episode_idx,
                    episode_count=test_episode_count,
                    continued_learning=False,
                    logging=logs,
                    render=render)
                start_episode_idx += test_episode_count
                mean_rewards[repeat_idx] = mean_reward

            logs.to_csv(get_path_without_extension(file) +
                        '_test{}_tmr{}.csv'.format(
                            int(time()), int(sum(mean_rewards) / repeat)),
                        index=False)
Пример #24
0
    def addEvidence(self, symbol = "IBM", \
        sd=dt.datetime(2008,1,1), \
        ed=dt.datetime(2009,1,1), \
        sv = 10000):

        # add your code to do learning here

        # example usage of the old backward compatible util function
        syms = [symbol]
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data(syms, dates)  # automatically adds SPY
        prices = prices_all[syms]  # only portfolio symbols
        prices_SPY = prices_all['SPY']  # only SPY, for comparison later
        if self.verbose: print prices

        # example use with new colname
        volume_all = ut.get_data(syms, dates,
                                 colname="Volume")  # automatically adds SPY
        volume = volume_all[syms]  # only portfolio symbols
        volume_SPY = volume_all['SPY']  # only SPY, for comparison later
        if self.verbose: print volume

        # feature engineering, get your new features for the state space.
        state_bolli, state_bandwidth, state_momentum, state_rsi = self.add_indicators(
            prices, save_data=self.save_data, symb=symbol)

        # compute the current state, well really all the states.
        states = self.discretize(state_bolli, state_bandwidth, state_momentum,
                                 state_rsi)
        number_of_bins = self.nbins
        self.learner = QLearner(num_states=(number_of_bins ** 4), \
                                num_actions = 3, \
                                alpha = 0.5, \
                                gamma = 0.9, \
                                rar = 0.0, \
                                radr = 0.0, \
                                dyna = 0, \
                                verbose = False)

        converged = False
        count = 0
        converged_yet = 0.

        while (not converged) and (count < 30):

            total_reward = 0
            prices_dataframe, df_trades = self.init_trades_and_prices_df(
                prices)
            holdings = 0

            for i in range(state_bolli.shape[0]):
                current_state = states[i]
                action = self.learner.querysetstate(current_state)
                # first day
                reward = self.compute_last_reward(prices, holdings, action, i)
                # get the current state and the reward to get an action.
                action = self.learner.query(current_state, reward)
                diggity_day = prices.index[i]
                if action == 0:
                    df_trades, holdings = self.compute_trades_and_holdings_sell(
                        df_trades, prices_dataframe, symbol, holdings,
                        diggity_day)
                if action == 1:
                    df_trades, holdings = self.compute_trades_and_holdings_buy(
                        df_trades, prices_dataframe, symbol, holdings,
                        diggity_day)
                total_reward += reward

            cumulative_return = self.compute_cumulative_return(
                prices, prices_dataframe, df_trades, sv)
            count += 1

            if abs(converged_yet - cumulative_return) * 100. < 0.0001:
                converged = True
            else:
                converged_yet = cumulative_return

        return pd.DataFrame(df_trades[symbol])
Пример #25
0
class StrategyLearner(object):

    # constructor
    def __init__(self, verbose=False, impact=0.0, save_data=True):
        self.verbose = verbose
        self.impact = impact
        self.nbins = 5
        self.save_data = save_data

    def author(self):
        return "mdunn34"

    def bollinger(self, prices, window_n=20, k=2):
        std = prices.rolling(window=window_n).std()
        mean = prices.rolling(window=window_n).mean()
        upper_bound = mean + (k * std)
        lower_bound = mean - (k * std)
        band = (prices - lower_bound) / (upper_bound - lower_bound)
        normed_band = (band - (band).mean()) / (band.std())
        bandwidth = (upper_bound - lower_bound) / mean * 100
        normed_band = normed_band.fillna(method="bfill")
        bandwidth = bandwidth.fillna(method="bfill")
        return normed_band, bandwidth

    def add_indicators(self,
                       prices,
                       window_n=5,
                       n_bins=5,
                       save_data=False,
                       symb=None):
        n_bins = self.nbins
        prices_new = prices.copy()

        def plot_data(df,
                      type,
                      title="Stock prices",
                      xlabel="Date",
                      ylabel="Value",
                      symb=None):
            """Plot stock prices with a custom title and meaningful axis labels."""
            ax = df.plot(title=title, fontsize=12)
            ax.set_xlabel(xlabel)
            ax.set_ylabel(ylabel)
            plt.savefig('{}_{}.png'.format(type, str(symb)))

        for symb in prices.columns:
            #bollinger bands here.
            bolli, bandwidth = self.bollinger(prices[symb], window_n=window_n)

            momentum = ((prices[symb] / prices[symb].shift(-1)) *
                        100).fillna(method="ffill")

            # rsi
            difference = prices[symb].diff()
            diff_up, diff_down = difference.copy(), difference.copy()
            diff_up[diff_up < 0] = 0
            diff_down[diff_down > 0] = 0
            rsi = 100. - (100. /
                          (1. +
                           (diff_up.rolling(window=window_n).mean() /
                            diff_down.rolling(window=window_n).mean().abs())))
            rsi = rsi.fillna(method="bfill")

            if save_data == True:
                print("Correlation between RSI and Momentum",
                      np.corrcoef(rsi, momentum))

                plot_data(bandwidth,
                          type='bandwidth',
                          title='Bandwidth Values')
                plt.clf()
                plot_data(bolli,
                          type='bollinger',
                          title='Normed Bollinger Values')
                plt.clf()
                plot_data(momentum, type='momentum', title='Momentum Values')
                plt.clf()
                plot_data(rsi, type='rsi', title='RSI Values')
                plt.clf()

            bolli = pd.cut(bolli, bins=n_bins, labels=False)
            bandwidth = pd.cut(bandwidth, bins=n_bins, labels=False)
            momentum = pd.cut(momentum, bins=n_bins, labels=False)
            rsi = pd.cut(rsi, bins=n_bins, labels=False)
        return np.asarray(bolli), np.asarray(bandwidth), np.asarray(
            momentum), np.asarray(rsi)

    def discretize(self, state_bolli, state_bandwidth, state_momentum,
                   state_rsi):
        return (state_bolli) + (state_bandwidth**2) + (state_momentum**
                                                       3) + (state_rsi**4)

    def init_trades_and_prices_df(self, prices):
        prices_dataframe = prices.copy()
        prices_dataframe['Cash'] = np.ones(prices.shape[0])
        df_trades = pd.DataFrame(np.zeros(prices_dataframe.shape),
                                 index=prices_dataframe.index,
                                 columns=prices_dataframe.columns)
        return prices_dataframe, df_trades

    def compute_last_reward(self, prices, holdings, action, i):
        if i == 0:
            reward = 0
        else:
            # compute the daily return.
            daily_return = (
                (prices.iloc[i] - prices.iloc[i - 1]) / prices.iloc[i]) * 100
            if holdings != 0:
                # Sell
                if action == 0:
                    reward = -1.0 * daily_return
                # Buy
                elif action == 1:
                    reward = daily_return
                else:
                    reward = -9
            else:
                reward = 0
        return reward

    def compute_trades_and_holdings_buy(self, df_trades, prices_dataframe,
                                        symbol, holdings, diggity_day):
        if holdings == -1000:
            df_trades["Cash"].loc[diggity_day] = df_trades["Cash"].loc[
                diggity_day] + 2000. * prices_dataframe[symbol].loc[
                    diggity_day] * -1.
            df_trades[symbol].loc[
                diggity_day] = df_trades[symbol].loc[diggity_day] + 2000.
            holdings = holdings + 2000.
        if holdings == 0:
            df_trades["Cash"].loc[diggity_day] = df_trades["Cash"].loc[
                diggity_day] + 1000. * prices_dataframe[symbol].loc[
                    diggity_day] * -1.
            df_trades[symbol].loc[
                diggity_day] = df_trades[symbol].loc[diggity_day] + 1000.
            holdings = holdings + 1000.
        return df_trades, holdings

    def compute_trades_and_holdings_sell(self, df_trades, prices_dataframe,
                                         symbol, holdings, diggity_day):
        if holdings == 1000:
            df_trades["Cash"].loc[diggity_day] = df_trades["Cash"].loc[
                diggity_day] + 2000.0 * prices_dataframe[symbol].loc[
                    diggity_day]
            df_trades[symbol].loc[
                diggity_day] = df_trades[symbol].loc[diggity_day] - 2000.
            holdings = holdings - 2000
        if holdings == 0:
            df_trades["Cash"].loc[diggity_day] = df_trades["Cash"].loc[
                diggity_day] + 1000. * prices_dataframe[symbol].loc[diggity_day]
            df_trades[symbol].loc[
                diggity_day] = df_trades[symbol].loc[diggity_day] - 1000.0
            holdings = holdings - 1000.

        return df_trades, holdings

    def compute_cumulative_return(self, prices, prices_dataframe, df_trades,
                                  sv):
        holdings_df = pd.DataFrame(np.zeros(df_trades.shape),
                                   columns=df_trades.columns,
                                   index=df_trades.index)
        values_df = holdings_df.copy()
        first_diggity_day = prices.index[0]
        holdings_df["Cash"].loc[first_diggity_day] = sv
        holdings_df.loc[first_diggity_day] = holdings_df.loc[
            first_diggity_day] + df_trades.loc[first_diggity_day]
        holdings_df = holdings_df.cumsum()
        values_df = (holdings_df * prices_dataframe)
        df_port_val = values_df.sum(axis=1)
        cumulative_return = (df_port_val.iloc[-1] - sv) / sv
        return cumulative_return

    # this method should create a QLearner, and train it for trading
    def addEvidence(self, symbol = "IBM", \
        sd=dt.datetime(2008,1,1), \
        ed=dt.datetime(2009,1,1), \
        sv = 10000):

        # add your code to do learning here

        # example usage of the old backward compatible util function
        syms = [symbol]
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data(syms, dates)  # automatically adds SPY
        prices = prices_all[syms]  # only portfolio symbols
        prices_SPY = prices_all['SPY']  # only SPY, for comparison later
        if self.verbose: print prices

        # example use with new colname
        volume_all = ut.get_data(syms, dates,
                                 colname="Volume")  # automatically adds SPY
        volume = volume_all[syms]  # only portfolio symbols
        volume_SPY = volume_all['SPY']  # only SPY, for comparison later
        if self.verbose: print volume

        # feature engineering, get your new features for the state space.
        state_bolli, state_bandwidth, state_momentum, state_rsi = self.add_indicators(
            prices, save_data=self.save_data, symb=symbol)

        # compute the current state, well really all the states.
        states = self.discretize(state_bolli, state_bandwidth, state_momentum,
                                 state_rsi)
        number_of_bins = self.nbins
        self.learner = QLearner(num_states=(number_of_bins ** 4), \
                                num_actions = 3, \
                                alpha = 0.5, \
                                gamma = 0.9, \
                                rar = 0.0, \
                                radr = 0.0, \
                                dyna = 0, \
                                verbose = False)

        converged = False
        count = 0
        converged_yet = 0.

        while (not converged) and (count < 30):

            total_reward = 0
            prices_dataframe, df_trades = self.init_trades_and_prices_df(
                prices)
            holdings = 0

            for i in range(state_bolli.shape[0]):
                current_state = states[i]
                action = self.learner.querysetstate(current_state)
                # first day
                reward = self.compute_last_reward(prices, holdings, action, i)
                # get the current state and the reward to get an action.
                action = self.learner.query(current_state, reward)
                diggity_day = prices.index[i]
                if action == 0:
                    df_trades, holdings = self.compute_trades_and_holdings_sell(
                        df_trades, prices_dataframe, symbol, holdings,
                        diggity_day)
                if action == 1:
                    df_trades, holdings = self.compute_trades_and_holdings_buy(
                        df_trades, prices_dataframe, symbol, holdings,
                        diggity_day)
                total_reward += reward

            cumulative_return = self.compute_cumulative_return(
                prices, prices_dataframe, df_trades, sv)
            count += 1

            if abs(converged_yet - cumulative_return) * 100. < 0.0001:
                converged = True
            else:
                converged_yet = cumulative_return

        return pd.DataFrame(df_trades[symbol])

    # this method should use the existing policy and test it against new data
    def testPolicy(self, symbol = "IBM", \
        sd=dt.datetime(2008,1,1), \
        ed=dt.datetime(2009,1,1), \
        sv = 10000):

        # here we build a fake set of trades
        # your code should return the same sort of data
        dates = pd.date_range(sd, ed)
        prices_all = ut.get_data([symbol], dates)  # automatically adds SPY
        prices = prices_all[[
            symbol,
        ]]  # only portfolio symbols
        trades_SPY = prices_all['SPY']  # only SPY, for comparison later
        """
        trades.values[:,:] = 0 # set them all to nothing
        trades.values[0,:] = 1000 # add a BUY at the start
        trades.values[40,:] = -1000 # add a SELL
        trades.values[41,:] = 1000 # add a BUY
        trades.values[60,:] = -2000 # go short from long
        trades.values[61,:] = 2000 # go long from short
        trades.values[-1,:] = -1000 #exit on the last day
        if self.verbose: print type(trades) # it better be a DataFrame!
        if self.verbose: print trades
        if self.verbose: print prices_all
        """
        prices_dataframe, df_trades = self.init_trades_and_prices_df(prices)
        holdings = 0
        state_bolli, state_bandwidth, state_momentum, state_rsi = self.add_indicators(
            prices)
        states = self.discretize(state_bolli, state_bandwidth, state_momentum,
                                 state_rsi)

        for i in range(state_bolli.shape[0]):
            current_state = states[i]
            action = self.learner.querysetstate(current_state)
            reward = self.compute_last_reward(prices, holdings, action, i)
            diggity_day = prices.index[i]
            if action == 0:
                df_trades, holdings = self.compute_trades_and_holdings_sell(
                    df_trades, prices_dataframe, symbol, holdings, diggity_day)
            if action == 1:
                df_trades, holdings = self.compute_trades_and_holdings_buy(
                    df_trades, prices_dataframe, symbol, holdings, diggity_day)
        return pd.DataFrame(df_trades[symbol])
Пример #26
0
def Qlearning_Otest():
    ''' Testing function for Q learning on O-Track'''
    q = QLearner(0.5, 0.9, 0.9, "O")
    q.track.show()
    q.train((3, 4, 0, 1))
    q.train((4, 3, 0, 1))
    q.train((20, 2, -1, 0))
    q.train((21, 4, -1, -1))
    q.train((20, 20, 0, -1))
    q.train((18, 22, 1, -1))
    q.train((4, 22, 1, 0))
    q.train((3, 20, 1, 1))
    q.train()
    for i in range(10):
        print(q.trial_run())
Пример #27
0
        print('Total reward={r} over {n} iterations'.format(r=sum(rewards), n=iterations))
        plt.figure()
        plt.plot(range(1,iterations+1), reward_iter)
        plt.show()


if __name__ == '__main__':
    SDP = firemdp.solveMDP()
    print("Finite Horizon")
    firemdp.printPolicy(SDP.policy[:, 0])

    fm = ForestManagement()
    vi, vi_policy, _, _ = fm.value_iteration()
    pi, pi_policy, _, _ = fm.policy_iteration()

    print("Value Iteration")
    firemdp.printPolicy(vi_policy)
    # fm.eval_policy(vi_policy,iterations=1000)
    print("Policy Iteration")
    firemdp.printPolicy(pi_policy)
    # fm.eval_policy(pi_policy, iterations=1000)

    ql = QLearner(fm.name, fm.prob, fm.rewards)
    ql.q_learning_trials(trials=20, vi=vi, pi=pi)
    run_stats,ql_policy = ql.q_learning(gamma=0.9911, alpha=0.3695, alpha_decay=0.9998, alpha_min=0.0747,
                                        epsilon=0.8608, epsilon_decay=0.9996, n_iter=47366,returnStats=True)
    print("QLearning Policy")
    firemdp.printPolicy(ql_policy)
    # fm.eval_policy(ql_policy, iterations=1000)

    sys.exit()