def get_reward(self, portfolio: Portfolio, stock_market_data: StockMarketData): current_portfolio_value = portfolio.get_value(stock_market_data) if self.last_portfolio_value < current_portfolio_value: return 100 * (current_portfolio_value / self.last_portfolio_value) elif self.last_portfolio_value > portfolio.get_value(stock_market_data): return -100 return -20
def get_order_list(self, portfolio: Portfolio, stock_market_data: StockMarketData): stock_price_a = stock_market_data[Company.A].get_last()[-1] stock_price_b = stock_market_data[Company.B].get_last()[-1] order_list = [] if self.actions[self.last_action][0] > 0: amount_to_buy_stock_a = int(portfolio.cash * self.actions[self.last_action][0] // stock_price_a) order_list.append( Order(OrderType.BUY, Company.A, amount_to_buy_stock_a)) elif self.actions[self.last_action][0] < 0: # sell everything we have, look at the actions, we don't have -0.8 or sth just -1 # we don't need any calculation for "amount_to_sell_stock_a" order_list.append( Order(OrderType.SELL, Company.A, portfolio.get_stock(Company.A))) if self.actions[self.last_action][1] > 0: amount_to_buy_stock_b = int(portfolio.cash * self.actions[self.last_action][1] // stock_price_b) order_list.append( Order(OrderType.BUY, Company.B, amount_to_buy_stock_b)) elif self.actions[self.last_action][1] < 0: # sell everything we have, look at the actions, we don't have -0.8 or sth just -1 # we don't need any calculation for "amount_to_sell_stock_b" order_list.append( Order(OrderType.SELL, Company.B, portfolio.get_stock(Company.B))) return order_list
def test_update_sufficient_cash_reserve(self): stock_market_data = StockMarketData([Company.A, Company.B], [Period.TESTING]) portfolio = Portfolio(20000, {Company.A: 200}) order_list = [Order(OrderType.BUY, Company.A, 100)] # Current cash reserve is sufficient for trade volume. Trade should happen portfolio.update_with_order_list(stock_market_data, order_list) self.assertEqual(portfolio.cash, 9724.0105) self.assertEqual(portfolio.stocks[Company.A], 300)
def test_update_no_sufficient_cash_reserve(self): stock_market_data = StockMarketData([Company.A, Company.B], [Period.TESTING]) portfolio = Portfolio(0, {Company.A: 200}) order_list = [Order(OrderType.BUY, Company.A, 100)] # Trade volume is too high for current cash reserve. Nothing should happen portfolio.update_with_order_list(stock_market_data, order_list) self.assertEqual(portfolio.cash, 0) self.assertEqual(portfolio.stocks[Company.A], 200)
def test_update_do_not_drop_below_cash_0(self): stock_market_data = StockMarketData([Company.A, Company.B], [Period.TESTING]) portfolio = Portfolio(110) # Create a order list whose individual actions are within the limit but in sum are over the limit # Most recent stock price of stock A is 102.759895 order_list = [Order(OrderType.BUY, Company.A, 1), Order(OrderType.BUY, Company.A, 1)] portfolio.update_with_order_list(stock_market_data, order_list) self.assertEqual(portfolio.cash, 7.240105) self.assertEqual(portfolio.stocks[Company.A], 1)
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] order_list = [] vote_a = self.expert_a.vote(stock_market_data[Company.A]) vote_b = self.expert_b.vote(stock_market_data[Company.B]) # convert votes to state state = np.array([self.vote2num[vote_a], self.vote2num[vote_b]]) if self.train_while_trading: if len(self.memory) > self.min_size_of_memory_before_training: # helper function which executes experience replay self._replay() # act action = self._act(state, stock_market_data, portfolio, order_list) if self.last_portfolio_value is not None: # Reward function R1 if self.last_portfolio_value <= portfolio.get_value( stock_market_data): reward = 1 else: reward = 0 # Reward function R2 #reward = (portfolio.get_value( # stock_market_data) - self.last_portfolio_value) / self.last_portfolio_value # helper to fill up the queue for performance replay self._remember(self.last_state, action, reward, state) # save state and portfolio value self.last_portfolio_value = portfolio.get_value(stock_market_data) self.last_state = state return order_list
def gen_reward(self, portfolio: Portfolio, stock_market_data: StockMarketData): print('gen_reward') if self.last_portfolio_value < portfolio.get_value(stock_market_data): return self.reward_factor * (portfolio.get_value(stock_market_data) / self.last_portfolio_value) elif self.last_portfolio_value > portfolio.get_value( stock_market_data): return -self.reward_factor * ( portfolio.get_value(stock_market_data) / self.last_portfolio_value) else: return -self.reward_factor / 5
def test_get_value_with_date(self): stock_market_data = StockMarketData([Company.A, Company.B], [Period.TESTING]) date = Date(2012, 1, 3) portfolio = Portfolio(100.0) self.assertEqual(portfolio.get_value(stock_market_data, date), 100.0) portfolio = Portfolio(100.0, {Company.A: 10}) self.assertEqual(portfolio.get_value(stock_market_data, date), 455.54107999999997) portfolio = Portfolio(100.0, {Company.A: 10, Company.B: 10}) self.assertEqual(portfolio.get_value(stock_market_data, date), 2046.9924999999998)
def test_update_action_order_does_not_matter(self): stock_market_data = StockMarketData([Company.A, Company.B], [Period.TESTING]) # Create two equal designed portfolios portfolio1 = Portfolio(0, {Company.A: 100}) portfolio2 = Portfolio(0, {Company.A: 100}) # Create two order lists with the same entries, however in different order order_list_1 = [ Order(OrderType.BUY, Company.A, 50), Order(OrderType.SELL, Company.A, 100) ] order_list_2 = [ Order(OrderType.SELL, Company.A, 100), Order(OrderType.BUY, Company.A, 50) ] # Execute the trade action lists on the two portfolios: Sell 100 stocks, skip buying because no cash available portfolio1.update_with_order_list(stock_market_data, order_list_1) portfolio2.update_with_order_list(stock_market_data, order_list_2) # The portfolios should still be equal after applying the actions self.assertEqual(portfolio1.cash, 10275.9895) self.assertEqual(portfolio1.cash, portfolio2.cash) self.assertEqual(portfolio1.stocks[Company.A], 0) self.assertEqual(portfolio1.stocks, portfolio2.stocks)
def __init__(self, expert_a, expert_b, stock_market_data: StockMarketData, portfolio: Portfolio): """ Constructor Args: expert_a : expert opinion from analyst A expert_b : expert opinion from analyst B portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation """ # get composition of current portfolio self.noStockA = portfolio.get_stock(Company.A) self.noStockB = portfolio.get_stock(Company.B) self.Cash = portfolio.cash # most important information: minimum cash neccessary to buy one addtional share: if set to 100 or any fixed value,the zig zag curve in the evalution set is occuring # when set to max no zigzag can be seen self.min_cash_to_buy = max( stock_market_data.get_most_recent_price(Company.A), stock_market_data.get_most_recent_price(Company.B)) # get votes from experts opinions company_list = stock_market_data.get_companies() for company in company_list: if company == Company.A: stock_data_a = stock_market_data[Company.A] vote_a = expert_a.vote(stock_data_a) elif company == Company.B: stock_data_b = stock_market_data[Company.B] vote_b = expert_b.vote(stock_data_b) else: assert False self.expertOpinionA = vote_a self.expertOpinionB = vote_b
def make_order(self, company: Company, orderTyp: OrderType, percentage, portfolio: Portfolio, stock_market_data: StockMarketData) -> Order: """ creates an Order Args: company: the company for the order orderTyp: the OrderTyp (as Vote instance) percentage: an integer indicating how much percent should be bought or sold Returns an order for one company of instance Order """ if orderTyp == OrderType.BUY: stock_price = stock_market_data.get_most_recent_price(company) port = portfolio.cash * percentage amount_to_buy = int(port // stock_price) logger.debug( f"{self.get_name()}: Got best action to buy {company}: and bought {amount_to_buy}" ) return Order(OrderType.BUY, company, amount_to_buy) if amount_to_buy > 0 else None elif orderTyp == OrderType.SELL: amount_to_sell = portfolio.get_stock(company) amount_to_sell *= percentage logger.debug( f"{self.get_name()}: Got best action to sell {company}: and sold {amount_to_sell}" ) return Order(OrderType.SELL, company, amount_to_sell) if amount_to_sell > 0 else None else: assert False
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] current_state = get_state(self, stock_market_data) if self.train_while_trading and self.last_state is not None: reward = get_reward(self, portfolio, stock_market_data) self.memory.append( (self.last_state, self.last_action, reward, current_state)) train_neural_net(self) action_index = get_index_for_action_to_execute(self, current_state) self.last_state = current_state self.last_action = action_index self.last_portfolio_value = portfolio.get_value(stock_market_data) return get_order_list(self, portfolio, stock_market_data)
def __follow_expert_vote(self, company: Company, stock_data: StockData, vote: Vote, buy_weight: float, portfolio: Portfolio, order_list: List[Order]): assert company is not None assert stock_data is not None assert vote is not None assert portfolio is not None assert order_list is not None if vote is Vote.BUY or vote is Vote.HOLD: assert buy_weight is not None and 0 < buy_weight <= 1.0 stock_price = stock_data.get_last()[-1] amount_to_buy = int(buy_weight * portfolio.cash // stock_price) logger.debug( f"{self.get_name()}: Got vote to buy {company}: {amount_to_buy} shares a {stock_price}" ) if amount_to_buy > 0: order_list.append(Order(OrderType.BUY, company, amount_to_buy)) elif vote == Vote.SELL: # sell as many stocks as possible amount_to_sell = portfolio.get_stock(company) logger.debug( f"{self.get_name()}: Got vote to sell {company}: {amount_to_sell} shares available" ) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, company, amount_to_sell)) else: # do nothing assert vote == Vote.HOLD logger.debug(f"{self.get_name()}: Got vote to hold {company}")
def get_orders(self, stock_market_data: StockMarketData, portfolio: Portfolio): orders = [] price_a = stock_market_data[Company.A].get_last()[-1] sell_off_a = portfolio.get_stock(Company.A) action_a = self.actions[self.last_action][0] order_a = int(action_a * portfolio.cash // price_a) orders.append( self.get_order_item(action_a, order_a, sell_off_a, Company.A)) price_b = stock_market_data[Company.B].get_last()[-1] sell_off_b = portfolio.get_stock(Company.B) action_b = self.actions[self.last_action][1] order_b = int(action_b * portfolio.cash // price_b) orders.append( self.get_order_item(action_b, order_b, sell_off_b, Company.B)) return orders
def test_trader_no_stock(self): trader = BuyAndHoldTrader('test_color', 'test_name') portfolio = Portfolio(1000) stock_market_data = StockMarketData([], [Period.TESTING]) order_list = trader.trade(portfolio, stock_market_data) self.assertIsNotNone(order_list) self.assertEqual(len(order_list), 0)
def test_create_portfolio(self): # empty portfolio portfolio = Portfolio() self.assertIsNotNone(portfolio) self.assertEqual(portfolio.cash, 0) self.assertEqual(portfolio.stocks, {}) # portfolio with cash portfolio = Portfolio(1000.0) self.assertIsNotNone(portfolio) self.assertEqual(portfolio.cash, 1000.0) self.assertEqual(portfolio.stocks, {}) # portfolio with cash and stocks portfolio = Portfolio(1000.0, {Company.A: 10, Company.B: 50}) self.assertIsNotNone(portfolio) self.assertEqual(portfolio.cash, 1000.0) self.assertEqual(len(portfolio.stocks.keys()), 2) self.assertEqual(portfolio.stocks[Company.A], 10) self.assertEqual(portfolio.stocks[Company.B], 50)
def test_trade_one_stock(self): trader = BuyAndHoldTrader('test_color', 'test_name') portfolio = Portfolio(1000) stock_market_data = StockMarketData([Company.A], [Period.TESTING]) order_list = trader.trade(portfolio, stock_market_data) self.assertIsNotNone(order_list) self.assertEqual(len(order_list), 1) self.assertEqual(order_list[0].type, OrderType.BUY) self.assertEqual(order_list[0].company, Company.A) self.assertEqual(order_list[0].amount, 9)
def run(self, data: StockMarketData, traders: List[ITrader], offset: int = 0) -> Dict[ITrader, Dict[Date, Portfolio]]: """ Runs the stock exchange over the given stock market data for the given traders. :param data: The complete stock market data :param traders: A list of all traders :param offset: The number of trading days which a will be skipped before (!) trading starts :return: The main data structure, which stores one portfolio per trade day, for each traders """ assert data is not None assert traders is not None # initialize the main data structure: Dictionary over traders, that stores each traders's portfolio per day # data structure type is Dict[ITrader, Dict[Date, Portfolio]] trade_dates = data.get_trade_days() assert trade_dates # must not be empty assert 0 <= offset < len(trade_dates) # offset must be feasible self.__complete_stock_market_data = data self.__trader_portfolios = { trader: { trade_dates[offset]: Portfolio(self.__cash) } for trader in traders } # iterate over all trade days minus 1, because we don't trade on the last day for tick in range(offset, len(trade_dates) - 1): logger.debug( f"Stock Exchange: Current tick '{tick}' means today is '{trade_dates[tick]}'" ) if tick % 365 == 1: print(trade_dates[tick]) # build stock market data until today current_stock_market_data = data.deepcopy_first_n_items(tick + 1) # iterate over all traders for trader in traders: # get the traders's order list by giving him a copy (to prevent cheating) of today's portfolio todays_portfolio = self.__trader_portfolios[trader][ trade_dates[tick]] current_order_list = trader.trade( copy.deepcopy(todays_portfolio), current_stock_market_data) # execute order list and save the result as tomorrow's portfolio tomorrows_portfolio = copy.deepcopy(todays_portfolio) tomorrows_portfolio.update_with_order_list( current_stock_market_data, current_order_list) self.__trader_portfolios[trader][trade_dates[ tick + 1]] = tomorrows_portfolio return self.__trader_portfolios
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # calculate current state state_ = state_maker(self.expert_a, self.expert_b, stock_market_data, portfolio) curr_state = state_.create_numerical_state(self.state_size) # calculate current portfolio value curr_portfolio_value = portfolio.get_value(stock_market_data) # train or testing mode if self.train_while_trading == True: # Store state as experience (memory) and train the neural network only if trade() was called before at least once if self.last_state is not None: reward = self.get_rewards(self.last_portfolio_value, curr_portfolio_value) self.remember(self.last_state, self.last_action_a, reward, curr_state) if len(self.memory) > self.min_size_of_memory_before_training: self.replay() # Create actions for current state and decrease epsilon for fewer random actions curr_action_a = self.get_best_action(curr_state) final_order = self.create_order_list(curr_action_a, portfolio, stock_market_data) self.decrease_epsilon() else: # predict best action from neuronal net curr_action_a = self.model.predict(curr_state) curr_action_a = np.argmax(curr_action_a[0]) final_order = self.create_order_list(curr_action_a, portfolio, stock_market_data) # Save created state, actions and portfolio value for the next call of trade() --> erledigt self.last_state = curr_state self.last_action_a = curr_action_a self.last_portfolio_value = curr_portfolio_value return final_order
def test_trade_vote_down_stock_a(self): expert_a = PerfectExpert(Company.A) expert_b = PerfectExpert(Company.B) trader = TrustingTrader(expert_a, expert_b, 'test_color', 'test_name') portfolio = Portfolio(1000.0, {Company.A: 10, Company.B: 10}) stock_market_data = StockMarketData([Company.A, Company.B], [Period.TESTING]).deepcopy_first_n_items(4) order_list = trader.trade(portfolio, stock_market_data) self.assertIsNotNone(order_list) self.assertEqual(len(order_list), 2) self.assertEqual(order_list[0].type, OrderType.SELL) self.assertEqual(order_list[0].company, Company.A) self.assertEqual(order_list[0].amount, 10.0) self.assertEqual(order_list[1].type, OrderType.SELL) self.assertEqual(order_list[1].company, Company.B) self.assertEqual(order_list[1].amount, 10.0)
def create_reward(self, portfolio: Portfolio, stock_market_data: StockMarketData, state_now: State): new_portfolio_value = portfolio.get_value(stock_market_data) index_of_action = Action.get_id_from_action(self.last_action) reward = -1 if (self.last_portfolio_value - new_portfolio_value) > 0 else 1 reward = 0 if (self.last_portfolio_value - new_portfolio_value) == 0 else reward reward_array = np.zeros([9]) reward_array[index_of_action] = reward q_next = self.run_model(state_now) weighted_q_next = q_next * self.gamma reward_array = np.sum([reward_array, weighted_q_next], axis=0) #reward_array[index_of_action] += self.gamma * q_next[index_of_action] return reward_array
def __follow_action(self, company: Company, stock_data: StockData, vote: Vote, portfolio: Portfolio, order_list: List[Order]): """ Protected helper method to calculate amount of stocks to be bought and sold. :param company: Company :param stock_data: StockData :param vote: Vote :param portfolio: Portfolio :param order_list: List[Order] :return: None (writes result to order_list) """ assert company is not None assert stock_data is not None assert vote is not None assert portfolio is not None assert order_list is not None if vote == Vote.BUY: # buy as many stocks as possible stock_price = stock_data.get_last()[-1] amount_to_buy = int(portfolio.cash // stock_price) logger.debug( f"{self.get_name()}: Got vote to buy {company}: {amount_to_buy} shares a {stock_price}" ) if amount_to_buy > 0: order_list.append(Order(OrderType.BUY, company, amount_to_buy)) elif vote == Vote.SELL: # sell as many stocks as possible amount_to_sell = portfolio.get_stock(company) logger.debug( f"{self.get_name()}: Got vote to sell {company}: {amount_to_sell} shares available" ) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, company, amount_to_sell)) else: # do nothing assert vote == Vote.HOLD logger.debug(f"{self.get_name()}: Got vote to hold {company}")
def __create_order_for_company( self, company: Company, portfolio: Portfolio, vote: Vote, stock_market_data: StockMarketData) -> Order: order = None if vote == Vote.SELL: amount = portfolio.get_stock(company) if amount > 0: order = Order(OrderType.SELL, company, amount) elif vote == Vote.BUY: stock_price = stock_market_data.get_most_recent_price(company) amount = 0 if (self.type_a == self.type_b): # buy both - half portfolio value for each amount = int((portfolio.cash // 2) // stock_price) else: amount = int(portfolio.cash // stock_price) if amount > 0: order = Order(OrderType.BUY, company, amount) return order
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] state = State(portfolio, self.expert_a.vote(stock_market_data[Company.A]), self.expert_b.vote(stock_market_data[Company.B]), stock_market_data[Company.A], stock_market_data[Company.B]) param = state.get_nn_input_state() if self.train_while_trading and self.last_state is not None: current_reward = self.gen_reward(portfolio, stock_market_data) self.update_memory(current_reward, param) self.train_model() self.last_action = self.get_action_idx(param) self.last_state = param self.last_portfolio_value = portfolio.get_value(stock_market_data) orders = self.get_orders(stock_market_data, portfolio) return orders
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # Compute the current state current_state = self.current_state(stock_market_data) current_portfolio_value = portfolio.get_value(stock_market_data) # Store state as experience (memory) and train the neural network only if trade() was called before at least once if self.last_action is not None and self.train_while_trading: reward = self.reward(current_portfolio_value) self.memory.append( (self.last_state, self.last_action, reward, current_state)) if len(self.memory) > self.min_size_of_memory_before_training: self.experience_replay() # Create actions for current state and decrease epsilon for fewer random actions action = self.get_action(current_state) self.epsilon = max( [self.epsilon_min, self.epsilon * self.epsilon_decay]) # Save created state, actions and portfolio value for the next call of trade() self.last_state = current_state self.last_action = action self.last_portfolio_value = current_portfolio_value # convert action to orderlist return self.mapping_action_order(action, portfolio, stock_market_data)
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # INPUT layer: 1 (buy or sell A?) # output layer: 2 ([buy_A, sell_A]) # TODO Compute the current state stock_data_A = stock_market_data[Company.A] expertA_voteA = self.expert_a.vote(stock_data_A) expertB_voteA = self.expert_b.vote(stock_data_A) stock_data_B = stock_market_data[Company.B] expertA_voteB = self.expert_a.vote(stock_data_B) expertB_voteB = self.expert_b.vote(stock_data_B) state = np.array([[ self.vote_map[expertA_voteA] + self.vote_map[expertB_voteA], self.vote_map[expertA_voteB] + self.vote_map[expertB_voteB], ]]) # do action 0 or 1? predictions = self.model.predict(state) #print(f'predictions:{predictions}') #input() action_A = np.argmax(predictions[0][0:2]) action_B = np.argmax(predictions[0][2:4]) most_recent_price_A = stock_market_data.get_most_recent_price( Company.A) most_recent_price_B = stock_market_data.get_most_recent_price( Company.B) order_list = [] money_to_spend = portfolio.cash # do stuff for A if action_A == 0: # buy all A amount_to_buy = money_to_spend // most_recent_price_A if amount_to_buy > 0: money_to_spend -= amount_to_buy * most_recent_price_A order_list.append( Order(OrderType.BUY, Company.A, amount_to_buy)) elif action_A == 1: # sell all A amount_to_sell = portfolio.get_stock(Company.A) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, Company.A, amount_to_sell)) else: assert False # do stuff for B if action_B == 0: # buy all B amount_to_buy = money_to_spend // most_recent_price_B if amount_to_buy > 0: order_list.append( Order(OrderType.BUY, Company.B, amount_to_buy)) elif action_B == 1: # sell all B amount_to_sell = portfolio.get_stock(Company.B) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, Company.B, amount_to_sell)) else: assert False if self.last_state is not None: # train diff = (portfolio.get_value(stock_market_data) / self.last_portfolio_value - 1) rec_vec = np.array([[-diff, -diff, -diff, -diff]]) rec_vec[0][self.last_action_a] = diff rec_vec[0][2 + self.last_action_b] = diff #reward_vec = np.array([[portfolio.get_value(stock_market_data)]]) self.model.fit(self.last_state, rec_vec) self.last_state = state self.last_action_a = action_A self.last_action_b = action_B self.last_portfolio_value = portfolio.get_value(stock_market_data) return order_list
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # INPUT layer: 1 (buy or sell A?) # output layer: 2 ([buy_A, sell_A]) # TODO Compute the current state stock_data_A = stock_market_data[Company.A] expertA_voteA = self.expert_a.vote(stock_data_A) stock_data_B = stock_market_data[Company.B] expertB_voteB = self.expert_b.vote(stock_data_B) state = np.array([[ self.vote_map[expertA_voteA], self.vote_map[expertB_voteB], ]]) # do action 0 or 1? predictions = self.model.predict(state) # TODO Create actions for current state and decrease epsilon for fewer random actions if random.random() < self.epsilon: # use random actions for A and B action_A = random.randrange(2) action_B = random.randrange(2) else: # use prediction actions action_A = np.argmax(predictions[0][0:2]) action_B = np.argmax(predictions[0][2:4]) if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay current_price_a = stock_market_data.get_most_recent_price(Company.A) current_price_b = stock_market_data.get_most_recent_price(Company.B) money_to_spend = portfolio.cash order_list = [] # do stuff for A if action_A == 0: # buy all A amount_to_buy = money_to_spend // current_price_a if amount_to_buy > 0: money_to_spend -= amount_to_buy * current_price_a order_list.append( Order(OrderType.BUY, Company.A, amount_to_buy)) elif action_A == 1: # sell all A amount_to_sell = portfolio.get_stock(Company.A) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, Company.A, amount_to_sell)) else: assert False # do stuff for B if action_B == 0: # buy all B amount_to_buy = money_to_spend // current_price_b if amount_to_buy > 0: order_list.append( Order(OrderType.BUY, Company.B, amount_to_buy)) elif action_B == 1: # sell all B amount_to_sell = portfolio.get_stock(Company.B) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, Company.B, amount_to_sell)) else: assert False # TODO train the neural network only if trade() was called before at least once if self.last_state is not None: # train diff_a = (current_price_a / self.last_price_a - 1) diff_b = (current_price_b / self.last_price_b - 1) fut_reward_a = np.max(predictions[0][0:2]) fut_reward_b = np.max(predictions[0][2:4]) reward_vec = np.array([[ diff_a + self.gamma * fut_reward_a, -diff_a + self.gamma * fut_reward_a, diff_b + self.gamma * fut_reward_b, -diff_b + self.gamma * fut_reward_b ]]) # TODO Store state as experience (memory) and replay # slides: <s, a, r, s'> # mine: <s, r> if self.min_size_of_memory_before_training <= len(self.memory): # take self.batch_size - 1 from memory batch = random.sample(self.memory, self.batch_size - 1) # append current state, reward batch.append((self.last_state, reward_vec)) for x, y in batch: self.model.fit(x, y, batch_size=self.batch_size, verbose=0) else: # only train with current (state, reward) self.model.fit(self.last_state, reward_vec, batch_size=1, verbose=0) self.memory.append((self.last_state, reward_vec)) # TODO Save created state, actions and portfolio value for the next call of trade() self.last_state = state self.last_action_a = action_A self.last_action_b = action_B self.last_portfolio_value = portfolio.get_value(stock_market_data) self.last_price_a = current_price_a self.last_price_b = current_price_b return order_list
def test_get_value_without_date(self): stock_market_data = StockMarketData([Company.A, Company.B], [Period.TESTING]) portfolio = Portfolio() self.assertEqual(portfolio.get_value(stock_market_data), 0) portfolio = Portfolio(100.0) self.assertEqual(portfolio.get_value(stock_market_data), 100.0) portfolio = Portfolio(100.0, {Company.A: 10}) self.assertEqual(portfolio.get_value(stock_market_data), 1127.59895) portfolio = Portfolio(100.0, {Company.A: 10, Company.B: 10}) self.assertEqual(portfolio.get_value(stock_market_data), 2416.5398400000004)
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # 1. Compute current state state = self.compute_state(stock_market_data) # 1.2 If training is turned off, just predict the next action and return orders if not self.train_while_trading: self.last_state = state actionSpace = self.model.predict(state) action = np.argmax(actionSpace[0]) orders = self.action_to_order(action, portfolio, stock_market_data) return orders # 2. Get a random action with the probability of epsilon, otherwise predict the action via the ANN if np.random.rand() <= self.epsilon and self.train_while_trading: action = np.random.randint(self.action_size, size=1)[0] else: actionSpace = self.model.predict(state) action = np.argmax(actionSpace[0]) # 3. Reduce Epsilon if it is bigger than epsilon min if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay # 4. Training of the ANN if self.train_while_trading and self.last_state is not None: # 4.1 Get reward reward = self.get_reward(portfolio.get_value(stock_market_data), self.last_portfolio_value) # 4.2 Store memory self.memory.append( [self.last_state, self.last_action, reward, state]) # 4.3 Actual training via Experience Replay if len(self.memory) > self.min_size_of_memory_before_training: self.experienceReplay(self.batch_size) # 5. Map Action + Create Order orders = self.action_to_order(action, portfolio, stock_market_data) # 6. Save the values self.last_state = state self.last_action = action self.last_portfolio_value = portfolio.get_value(stock_market_data) return orders
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # INPUT layer: 1 (buy or sell A?) # output layer: 2 ([buy_A, sell_A]) # TODO Compute the current state stock_data_A = stock_market_data[Company.A] expertA_voteA = self.expert_a.vote(stock_data_A) expertB_voteA = self.expert_b.vote(stock_data_A) stock_data_B = stock_market_data[Company.B] expertA_voteB = self.expert_a.vote(stock_data_B) expertB_voteB = self.expert_b.vote(stock_data_B) state = np.array([[ self.vote_map[expertA_voteA] + self.vote_map[expertB_voteA], self.vote_map[expertA_voteB] + self.vote_map[expertB_voteB], ]]) # do action 0 or 1? predictions = self.model.predict(state) ''' if random.random() < self.epsilon: # use random actions for A and B action_A = random.randrange(2) action_B = random.randrange(2) else: # use prediction actions action_A = np.argmax(predictions[0][0:2]) action_B = np.argmax(predictions[0][2:4]) ''' action_A = np.argmax(predictions[0][0:2]) action_B = np.argmax(predictions[0][2:4]) current_price_a = stock_market_data.get_most_recent_price(Company.A) current_price_b = stock_market_data.get_most_recent_price(Company.B) money_to_spend = portfolio.cash order_list = [] # do stuff for A if action_A == 0: # buy all A amount_to_buy = money_to_spend // current_price_a if amount_to_buy > 0: money_to_spend -= amount_to_buy * current_price_a order_list.append(Order(OrderType.BUY, Company.A, amount_to_buy)) elif action_A == 1: # sell all A amount_to_sell = portfolio.get_stock(Company.A) if amount_to_sell > 0: order_list.append(Order(OrderType.SELL, Company.A, amount_to_sell)) else: assert False # do stuff for B if action_B == 0: # buy all B amount_to_buy = money_to_spend // current_price_b if amount_to_buy > 0: order_list.append(Order(OrderType.BUY, Company.B, amount_to_buy)) elif action_B == 1: # sell all B amount_to_sell = portfolio.get_stock(Company.B) if amount_to_sell > 0: order_list.append(Order(OrderType.SELL, Company.B, amount_to_sell)) else: assert False if self.last_state is not None: # train diff_a = (current_price_a / self.last_price_a - 1) diff_b = (current_price_b / self.last_price_b - 1) fut_reward_a_buy = np.max(predictions[0][0]) fut_reward_a_buy = fut_reward_a_buy if fut_reward_a_buy > 0 else 0 fut_reward_a_sell = np.max(predictions[0][1]) fut_reward_a_sell = fut_reward_a_sell if fut_reward_a_sell > 0 else 0 fut_reward_b_buy = np.max(predictions[0][2]) fut_reward_b_buy = fut_reward_b_buy if fut_reward_b_buy > 0 else 0 fut_reward_b_sell = np.max(predictions[0][3]) fut_reward_b_sell = fut_reward_b_sell if fut_reward_b_sell > 0 else 0 reward_vec = np.array([[ diff_a + self.gamma * fut_reward_a_buy, -diff_a + self.gamma * fut_reward_a_sell, diff_b + self.gamma * fut_reward_b_buy, -diff_b + self.gamma * fut_reward_b_sell ]]) #reward_vec = np.array([[portfolio.get_value(stock_market_data)]]) self.model.fit(self.last_state, reward_vec, verbose=0) self.last_state = state self.last_action_a = action_A self.last_action_b = action_B self.last_portfolio_value = portfolio.get_value(stock_market_data) self.last_price_a = current_price_a self.last_price_b = current_price_b return order_list