def get_order_list(self, portfolio: Portfolio, stock_market_data: StockMarketData): stock_price_a = stock_market_data[Company.A].get_last()[-1] stock_price_b = stock_market_data[Company.B].get_last()[-1] order_list = [] if self.actions[self.last_action][0] > 0: amount_to_buy_stock_a = int(portfolio.cash * self.actions[self.last_action][0] // stock_price_a) order_list.append( Order(OrderType.BUY, Company.A, amount_to_buy_stock_a)) elif self.actions[self.last_action][0] < 0: # sell everything we have, look at the actions, we don't have -0.8 or sth just -1 # we don't need any calculation for "amount_to_sell_stock_a" order_list.append( Order(OrderType.SELL, Company.A, portfolio.get_stock(Company.A))) if self.actions[self.last_action][1] > 0: amount_to_buy_stock_b = int(portfolio.cash * self.actions[self.last_action][1] // stock_price_b) order_list.append( Order(OrderType.BUY, Company.B, amount_to_buy_stock_b)) elif self.actions[self.last_action][1] < 0: # sell everything we have, look at the actions, we don't have -0.8 or sth just -1 # we don't need any calculation for "amount_to_sell_stock_b" order_list.append( Order(OrderType.SELL, Company.B, portfolio.get_stock(Company.B))) return order_list
def __follow_expert_vote(self, company: Company, stock_data: StockData, vote: Vote, buy_weight: float, portfolio: Portfolio, order_list: List[Order]): assert company is not None assert stock_data is not None assert vote is not None assert portfolio is not None assert order_list is not None if vote is Vote.BUY or vote is Vote.HOLD: assert buy_weight is not None and 0 < buy_weight <= 1.0 stock_price = stock_data.get_last()[-1] amount_to_buy = int(buy_weight * portfolio.cash // stock_price) logger.debug( f"{self.get_name()}: Got vote to buy {company}: {amount_to_buy} shares a {stock_price}" ) if amount_to_buy > 0: order_list.append(Order(OrderType.BUY, company, amount_to_buy)) elif vote == Vote.SELL: # sell as many stocks as possible amount_to_sell = portfolio.get_stock(company) logger.debug( f"{self.get_name()}: Got vote to sell {company}: {amount_to_sell} shares available" ) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, company, amount_to_sell)) else: # do nothing assert vote == Vote.HOLD logger.debug(f"{self.get_name()}: Got vote to hold {company}")
def __init__(self, expert_a, expert_b, stock_market_data: StockMarketData, portfolio: Portfolio): """ Constructor Args: expert_a : expert opinion from analyst A expert_b : expert opinion from analyst B portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation """ # get composition of current portfolio self.noStockA = portfolio.get_stock(Company.A) self.noStockB = portfolio.get_stock(Company.B) self.Cash = portfolio.cash # most important information: minimum cash neccessary to buy one addtional share: if set to 100 or any fixed value,the zig zag curve in the evalution set is occuring # when set to max no zigzag can be seen self.min_cash_to_buy = max( stock_market_data.get_most_recent_price(Company.A), stock_market_data.get_most_recent_price(Company.B)) # get votes from experts opinions company_list = stock_market_data.get_companies() for company in company_list: if company == Company.A: stock_data_a = stock_market_data[Company.A] vote_a = expert_a.vote(stock_data_a) elif company == Company.B: stock_data_b = stock_market_data[Company.B] vote_b = expert_b.vote(stock_data_b) else: assert False self.expertOpinionA = vote_a self.expertOpinionB = vote_b
def make_order(self, company: Company, orderTyp: OrderType, percentage, portfolio: Portfolio, stock_market_data: StockMarketData) -> Order: """ creates an Order Args: company: the company for the order orderTyp: the OrderTyp (as Vote instance) percentage: an integer indicating how much percent should be bought or sold Returns an order for one company of instance Order """ if orderTyp == OrderType.BUY: stock_price = stock_market_data.get_most_recent_price(company) port = portfolio.cash * percentage amount_to_buy = int(port // stock_price) logger.debug( f"{self.get_name()}: Got best action to buy {company}: and bought {amount_to_buy}" ) return Order(OrderType.BUY, company, amount_to_buy) if amount_to_buy > 0 else None elif orderTyp == OrderType.SELL: amount_to_sell = portfolio.get_stock(company) amount_to_sell *= percentage logger.debug( f"{self.get_name()}: Got best action to sell {company}: and sold {amount_to_sell}" ) return Order(OrderType.SELL, company, amount_to_sell) if amount_to_sell > 0 else None else: assert False
def get_orders(self, stock_market_data: StockMarketData, portfolio: Portfolio): orders = [] price_a = stock_market_data[Company.A].get_last()[-1] sell_off_a = portfolio.get_stock(Company.A) action_a = self.actions[self.last_action][0] order_a = int(action_a * portfolio.cash // price_a) orders.append( self.get_order_item(action_a, order_a, sell_off_a, Company.A)) price_b = stock_market_data[Company.B].get_last()[-1] sell_off_b = portfolio.get_stock(Company.B) action_b = self.actions[self.last_action][1] order_b = int(action_b * portfolio.cash // price_b) orders.append( self.get_order_item(action_b, order_b, sell_off_b, Company.B)) return orders
def __follow_action(self, company: Company, stock_data: StockData, vote: Vote, portfolio: Portfolio, order_list: List[Order]): """ Protected helper method to calculate amount of stocks to be bought and sold. :param company: Company :param stock_data: StockData :param vote: Vote :param portfolio: Portfolio :param order_list: List[Order] :return: None (writes result to order_list) """ assert company is not None assert stock_data is not None assert vote is not None assert portfolio is not None assert order_list is not None if vote == Vote.BUY: # buy as many stocks as possible stock_price = stock_data.get_last()[-1] amount_to_buy = int(portfolio.cash // stock_price) logger.debug( f"{self.get_name()}: Got vote to buy {company}: {amount_to_buy} shares a {stock_price}" ) if amount_to_buy > 0: order_list.append(Order(OrderType.BUY, company, amount_to_buy)) elif vote == Vote.SELL: # sell as many stocks as possible amount_to_sell = portfolio.get_stock(company) logger.debug( f"{self.get_name()}: Got vote to sell {company}: {amount_to_sell} shares available" ) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, company, amount_to_sell)) else: # do nothing assert vote == Vote.HOLD logger.debug(f"{self.get_name()}: Got vote to hold {company}")
def __create_order_for_company( self, company: Company, portfolio: Portfolio, vote: Vote, stock_market_data: StockMarketData) -> Order: order = None if vote == Vote.SELL: amount = portfolio.get_stock(company) if amount > 0: order = Order(OrderType.SELL, company, amount) elif vote == Vote.BUY: stock_price = stock_market_data.get_most_recent_price(company) amount = 0 if (self.type_a == self.type_b): # buy both - half portfolio value for each amount = int((portfolio.cash // 2) // stock_price) else: amount = int(portfolio.cash // stock_price) if amount > 0: order = Order(OrderType.BUY, company, amount) return order
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # INPUT layer: 1 (buy or sell A?) # output layer: 2 ([buy_A, sell_A]) # TODO Compute the current state stock_data_A = stock_market_data[Company.A] expertA_voteA = self.expert_a.vote(stock_data_A) stock_data_B = stock_market_data[Company.B] expertB_voteB = self.expert_b.vote(stock_data_B) state = np.array([[ self.vote_map[expertA_voteA], self.vote_map[expertB_voteB], ]]) # do action 0 or 1? predictions = self.model.predict(state) # TODO Create actions for current state and decrease epsilon for fewer random actions if random.random() < self.epsilon: # use random actions for A and B action_A = random.randrange(2) action_B = random.randrange(2) else: # use prediction actions action_A = np.argmax(predictions[0][0:2]) action_B = np.argmax(predictions[0][2:4]) if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay current_price_a = stock_market_data.get_most_recent_price(Company.A) current_price_b = stock_market_data.get_most_recent_price(Company.B) money_to_spend = portfolio.cash order_list = [] # do stuff for A if action_A == 0: # buy all A amount_to_buy = money_to_spend // current_price_a if amount_to_buy > 0: money_to_spend -= amount_to_buy * current_price_a order_list.append( Order(OrderType.BUY, Company.A, amount_to_buy)) elif action_A == 1: # sell all A amount_to_sell = portfolio.get_stock(Company.A) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, Company.A, amount_to_sell)) else: assert False # do stuff for B if action_B == 0: # buy all B amount_to_buy = money_to_spend // current_price_b if amount_to_buy > 0: order_list.append( Order(OrderType.BUY, Company.B, amount_to_buy)) elif action_B == 1: # sell all B amount_to_sell = portfolio.get_stock(Company.B) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, Company.B, amount_to_sell)) else: assert False # TODO train the neural network only if trade() was called before at least once if self.last_state is not None: # train diff_a = (current_price_a / self.last_price_a - 1) diff_b = (current_price_b / self.last_price_b - 1) fut_reward_a = np.max(predictions[0][0:2]) fut_reward_b = np.max(predictions[0][2:4]) reward_vec = np.array([[ diff_a + self.gamma * fut_reward_a, -diff_a + self.gamma * fut_reward_a, diff_b + self.gamma * fut_reward_b, -diff_b + self.gamma * fut_reward_b ]]) # TODO Store state as experience (memory) and replay # slides: <s, a, r, s'> # mine: <s, r> if self.min_size_of_memory_before_training <= len(self.memory): # take self.batch_size - 1 from memory batch = random.sample(self.memory, self.batch_size - 1) # append current state, reward batch.append((self.last_state, reward_vec)) for x, y in batch: self.model.fit(x, y, batch_size=self.batch_size, verbose=0) else: # only train with current (state, reward) self.model.fit(self.last_state, reward_vec, batch_size=1, verbose=0) self.memory.append((self.last_state, reward_vec)) # TODO Save created state, actions and portfolio value for the next call of trade() self.last_state = state self.last_action_a = action_A self.last_action_b = action_B self.last_portfolio_value = portfolio.get_value(stock_market_data) self.last_price_a = current_price_a self.last_price_b = current_price_b return order_list
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # INPUT layer: 1 (buy or sell A?) # output layer: 2 ([buy_A, sell_A]) # TODO Compute the current state stock_data_A = stock_market_data[Company.A] expertA_voteA = self.expert_a.vote(stock_data_A) expertB_voteA = self.expert_b.vote(stock_data_A) stock_data_B = stock_market_data[Company.B] expertA_voteB = self.expert_a.vote(stock_data_B) expertB_voteB = self.expert_b.vote(stock_data_B) state = np.array([[ self.vote_map[expertA_voteA] + self.vote_map[expertB_voteA], self.vote_map[expertA_voteB] + self.vote_map[expertB_voteB], ]]) # do action 0 or 1? predictions = self.model.predict(state) ''' if random.random() < self.epsilon: # use random actions for A and B action_A = random.randrange(2) action_B = random.randrange(2) else: # use prediction actions action_A = np.argmax(predictions[0][0:2]) action_B = np.argmax(predictions[0][2:4]) ''' action_A = np.argmax(predictions[0][0:2]) action_B = np.argmax(predictions[0][2:4]) current_price_a = stock_market_data.get_most_recent_price(Company.A) current_price_b = stock_market_data.get_most_recent_price(Company.B) money_to_spend = portfolio.cash order_list = [] # do stuff for A if action_A == 0: # buy all A amount_to_buy = money_to_spend // current_price_a if amount_to_buy > 0: money_to_spend -= amount_to_buy * current_price_a order_list.append(Order(OrderType.BUY, Company.A, amount_to_buy)) elif action_A == 1: # sell all A amount_to_sell = portfolio.get_stock(Company.A) if amount_to_sell > 0: order_list.append(Order(OrderType.SELL, Company.A, amount_to_sell)) else: assert False # do stuff for B if action_B == 0: # buy all B amount_to_buy = money_to_spend // current_price_b if amount_to_buy > 0: order_list.append(Order(OrderType.BUY, Company.B, amount_to_buy)) elif action_B == 1: # sell all B amount_to_sell = portfolio.get_stock(Company.B) if amount_to_sell > 0: order_list.append(Order(OrderType.SELL, Company.B, amount_to_sell)) else: assert False if self.last_state is not None: # train diff_a = (current_price_a / self.last_price_a - 1) diff_b = (current_price_b / self.last_price_b - 1) fut_reward_a_buy = np.max(predictions[0][0]) fut_reward_a_buy = fut_reward_a_buy if fut_reward_a_buy > 0 else 0 fut_reward_a_sell = np.max(predictions[0][1]) fut_reward_a_sell = fut_reward_a_sell if fut_reward_a_sell > 0 else 0 fut_reward_b_buy = np.max(predictions[0][2]) fut_reward_b_buy = fut_reward_b_buy if fut_reward_b_buy > 0 else 0 fut_reward_b_sell = np.max(predictions[0][3]) fut_reward_b_sell = fut_reward_b_sell if fut_reward_b_sell > 0 else 0 reward_vec = np.array([[ diff_a + self.gamma * fut_reward_a_buy, -diff_a + self.gamma * fut_reward_a_sell, diff_b + self.gamma * fut_reward_b_buy, -diff_b + self.gamma * fut_reward_b_sell ]]) #reward_vec = np.array([[portfolio.get_value(stock_market_data)]]) self.model.fit(self.last_state, reward_vec, verbose=0) self.last_state = state self.last_action_a = action_A self.last_action_b = action_B self.last_portfolio_value = portfolio.get_value(stock_market_data) self.last_price_a = current_price_a self.last_price_b = current_price_b return order_list
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # INPUT layer: 1 (buy or sell A?) # output layer: 2 ([buy_A, sell_A]) # TODO Compute the current state stock_data_A = stock_market_data[Company.A] expertA_voteA = self.expert_a.vote(stock_data_A) expertB_voteA = self.expert_b.vote(stock_data_A) stock_data_B = stock_market_data[Company.B] expertA_voteB = self.expert_a.vote(stock_data_B) expertB_voteB = self.expert_b.vote(stock_data_B) state = np.array([[ self.vote_map[expertA_voteA] + self.vote_map[expertB_voteA], self.vote_map[expertA_voteB] + self.vote_map[expertB_voteB], ]]) # do action 0 or 1? predictions = self.model.predict(state) #print(f'predictions:{predictions}') #input() action_A = np.argmax(predictions[0][0:2]) action_B = np.argmax(predictions[0][2:4]) most_recent_price_A = stock_market_data.get_most_recent_price( Company.A) most_recent_price_B = stock_market_data.get_most_recent_price( Company.B) order_list = [] money_to_spend = portfolio.cash # do stuff for A if action_A == 0: # buy all A amount_to_buy = money_to_spend // most_recent_price_A if amount_to_buy > 0: money_to_spend -= amount_to_buy * most_recent_price_A order_list.append( Order(OrderType.BUY, Company.A, amount_to_buy)) elif action_A == 1: # sell all A amount_to_sell = portfolio.get_stock(Company.A) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, Company.A, amount_to_sell)) else: assert False # do stuff for B if action_B == 0: # buy all B amount_to_buy = money_to_spend // most_recent_price_B if amount_to_buy > 0: order_list.append( Order(OrderType.BUY, Company.B, amount_to_buy)) elif action_B == 1: # sell all B amount_to_sell = portfolio.get_stock(Company.B) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, Company.B, amount_to_sell)) else: assert False if self.last_state is not None: # train diff = (portfolio.get_value(stock_market_data) / self.last_portfolio_value - 1) rec_vec = np.array([[-diff, -diff, -diff, -diff]]) rec_vec[0][self.last_action_a] = diff rec_vec[0][2 + self.last_action_b] = diff #reward_vec = np.array([[portfolio.get_value(stock_market_data)]]) self.model.fit(self.last_state, rec_vec) self.last_state = state self.last_action_a = action_A self.last_action_b = action_B self.last_portfolio_value = portfolio.get_value(stock_market_data) return order_list
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock marketf" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # TODO Compute the current state stock_data_a = None stock_data_b = None last_stock_data_a = None last_stock_data_b = None company_list = stock_market_data.get_companies() for company in company_list: if company == Company.A: stock_data_a = stock_market_data[Company.A] last_stock_data_a = stock_data_a.get_from_offset(-2) elif company == Company.B: stock_data_b = stock_market_data[Company.B] last_stock_data_b = stock_data_b.get_from_offset(-2) else: assert False vote_a = self.expert_a.vote(stock_data_a) vote_b = self.expert_b.vote(stock_data_b) state = State(last_stock_data_a, last_stock_data_b, vote_a, vote_b) # TODO Q-Learning nn_input = np.array( [np.array([state.aDiff, state.vote_a, state.bDiff, state.vote_b])]) action_vals = self.model.predict(nn_input) # TODO Store state as experience (memory) and train the neural network only if trade() was called before at least once # TODO Create actions for current state and decrease epsilon for fewer random actions actions = [[ Order(OrderType.BUY, Company.A, int((portfolio.cash / 2) // stock_data_a.get_last()[-1])), Order(OrderType.BUY, Company.B, int((portfolio.cash / 2) // stock_data_b.get_last()[-1])) ], [ Order( OrderType.BUY, Company.A, int((portfolio.cash) // stock_data_a.get_last()[-1])), Order(OrderType.SELL, Company.B, portfolio.get_stock(Company.B)) ], [ Order(OrderType.SELL, Company.A, portfolio.get_stock(Company.A)), Order( OrderType.BUY, Company.B, int(portfolio.cash // stock_data_b.get_last()[-1])) ], [ Order(OrderType.SELL, Company.A, portfolio.get_stock(Company.A)), Order(OrderType.SELL, Company.B, portfolio.get_stock(Company.B)) ], [ Order(OrderType.SELL, Company.A, 0), Order(OrderType.SELL, Company.B, 0) ]] if not self.train_while_trading: self.epsilon = 0.0 else: if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay else: self.epsilon = self.epsilon_min # randomize action if random.random() < self.epsilon: next_action = random.choice(list(range(self.action_size))) else: next_action = np.argmax(action_vals[0]) order_list = actions[next_action] portfolio_value = portfolio.get_value( stock_market_data, stock_market_data.get_most_recent_trade_day()) if (self.last_state != None and self.train_while_trading): def reward(oldVal, newVal): neg = -100.0 pos = 100.0 q = newVal / oldVal if q < 1: return neg elif q == 1: return -10 else: print("Q: ", q) return pos / 2 * oldVal / newVal r = reward(self.last_portfolio_value, portfolio_value) action_vals[0][self.last_order] = r self.memory.append([self.last_input, action_vals]) if (len(self.memory) > self.min_size_of_memory_before_training): sample = random.sample(self.memory, self.batch_size) trainSample = list() testSample = list() for [sampleIn, sampleOut] in sample: trainSample.append(sampleIn[0]) testSample.append(sampleOut[0]) self.model.fit(np.array(trainSample), np.array(testSample), self.batch_size) # Save created state, actions and portfolio value for the next call of trade() self.last_input = nn_input self.last_state = state self.last_order = next_action self.last_portfolio_value = portfolio_value print(next_action, action_vals, portfolio.cash, portfolio.get_stock(Company.A), portfolio.get_stock(Company.B)) return order_list
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] self.day += 1 # TODO Compute the current state order_list = [] stock_data_a = stock_market_data[Company.A] stock_data_b = stock_market_data[Company.B] # Expert A expert_a = self.expert_a.vote(stock_data_a) # Expert B expert_b = self.expert_b.vote(stock_data_b) state = np.array([[ self.vote_num[expert_a], self.vote_num[expert_b], ]]) action = self.decide_action(state) # TODO Store state as experience (memory) and train the neural network only if trade() was called before at least once if self.last_state is not None: reward = (portfolio.get_value(stock_market_data) - self.last_portfolio_value) / self.last_portfolio_value self.memory.append((self.last_state, self.last_action, reward, state)) self.train_network(self.batch_size) # TODO Create actions for current state and decrease epsilon for fewer random actions act0 = 0 act1 = 0 act2 = 0 act3 = 0 act4 = 0 act5 = 0 act6 = 0 act7 = 0 act8 = 0 # What amount of the stocks should be bought or sold percent_buy = 1 percent_sell = 1 if action == 0: # Buy A stock_price_a = stock_market_data.get_most_recent_price(Company.A) amount_to_buy_a = int((portfolio.cash*percent_buy/2) // stock_price_a) if amount_to_buy_a > 0: order_list.append(Order(OrderType.BUY, Company.A, amount_to_buy_a)) # Buy B stock_price_b = stock_market_data.get_most_recent_price(Company.B) amount_to_buy_b = int((portfolio.cash*percent_buy/2) // stock_price_b) if amount_to_buy_b > 0: order_list.append(Order(OrderType.BUY, Company.B, amount_to_buy_b)) act0 += 1 elif action == 1: # Buy A stock_price_a = stock_market_data.get_most_recent_price(Company.A) amount_to_buy_a = int(portfolio.cash *percent_buy// stock_price_a) if amount_to_buy_a > 0: order_list.append(Order(OrderType.BUY, Company.A, amount_to_buy_a)) # Sell B amount_to_sell_b = int(portfolio.get_stock(Company.B)*percent_sell) if amount_to_sell_b > 0: order_list.append(Order(OrderType.SELL, Company.B, amount_to_sell_b)) act1 += 1 elif action == 2: # Sell A amount_to_sell_a = int(portfolio.get_stock(Company.A)*percent_sell) if amount_to_sell_a > 0: order_list.append(Order(OrderType.SELL, Company.A, amount_to_sell_a)) # Buy B stock_price_b = stock_market_data.get_most_recent_price(Company.B) amount_to_buy_b = int(portfolio.cash*percent_buy // stock_price_b) if amount_to_buy_b > 0: order_list.append(Order(OrderType.BUY, Company.B, amount_to_buy_b)) act2 += 1 elif action == 3: # Sell A amount_to_sell_a = int(portfolio.get_stock(Company.A)*percent_sell) if amount_to_sell_a > 0: order_list.append(Order(OrderType.SELL, Company.A, amount_to_sell_a)) # Sell B amount_to_sell_b = int(portfolio.get_stock(Company.B)*percent_sell) if amount_to_sell_b > 0: order_list.append(Order(OrderType.SELL, Company.B, amount_to_sell_b)) act3 += 1 elif action == 4: # Sell A amount_to_sell_a = int(portfolio.get_stock(Company.A)*percent_sell) if amount_to_sell_a > 0: order_list.append(Order(OrderType.SELL, Company.A, amount_to_sell_a)) # Hold B act4 += 1 elif action == 5: # Hold A # Sell B amount_to_sell_b = int(portfolio.get_stock(Company.B)*percent_sell) if amount_to_sell_b > 0: order_list.append(Order(OrderType.SELL, Company.B, amount_to_sell_b)) act5 += 1 elif action == 6: # Buy A stock_price_a = stock_market_data.get_most_recent_price(Company.A) amount_to_buy_a = int((portfolio.cash*percent_buy) // stock_price_a) if amount_to_buy_a > 0: order_list.append(Order(OrderType.BUY, Company.A, amount_to_buy_a)) # Hold B act6 += 1 elif action == 7: # Hold A # Buy B stock_price_b = stock_market_data.get_most_recent_price(Company.B) amount_to_buy_b = int((portfolio.cash*percent_buy) // stock_price_b) if amount_to_buy_b > 0: order_list.append(Order(OrderType.BUY, Company.B, amount_to_buy_b)) act7 += 1 elif action == 8: # Hold A # Hold B order_list.append(Order(OrderType.BUY, Company.B, 0)) act8 += 1 else: print("undefined action called"+str(action)) # Decrease the epsilon for fewer random actions if self.epsilon > self.epsilon_min: self.epsilon *= self.epsilon_decay # TODO Save created state, actions and portfolio value for the next call of trade() self.last_state = state self.last_action = action self.last_portfolio_value = portfolio.get_value(stock_market_data) return order_list
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate actions to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # Compute the current state expert_votes = [ self.experts[i].vote(stock_market_data[company]) for i, company in enumerate(stock_market_data.get_companies()) ] shares_owned = [ portfolio.get_stock(company) for company in stock_market_data.get_companies() ] if State is StateExpertsOnly: state = StateExpertsOnly(expert_votes, portfolio.get_value(stock_market_data)) elif State is StateExpertsCashShares: state = StateExpertsCashShares(expert_votes, portfolio.cash, shares_owned, portfolio.get_value(stock_market_data)) else: raise RuntimeError if self.train_while_trading: # store state as experience in memory if len(self.memory) > 0: self.memory[-1].state2 = state experience = Experience( state1=state ) self.memory.append(experience) # train if len(self.memory) >= self.min_size_of_memory_before_training: if self.days_passed % self.train_each_n_days == 0: self.train() # determine probability for random actions if not self.is_evolved_model: # first training episode random_action_probability = ( (self.epsilon ** self.days_passed) * (1.0 - self.random_action_min_probability) + self.random_action_min_probability ) else: # subsequent training episode random_action_probability = self.random_action_min_probability if self.training_occasions == 0 or random.random() < random_action_probability: actions = [Action.get_random(), Action.get_random()] else: # choose actions by querying network x = state.to_input() y = self.model.predict(numpy.array([x])) assert y.shape == (1, self.action_size) actions = Action.from_model_prediction(y[0]) experience.actions = actions else: # not training -> always choose actions by querying network actions = Action.from_model_prediction(self.model.predict(numpy.array([state.to_input()]))[0]) # translate actions into orders orders: typing.List[Order] = [] companies_with_actions_and_magnitudes = list(zip(list(Company), actions, Action.get_action_magnitudes(actions))) for comp, action, mag in companies_with_actions_and_magnitudes: if action.is_buy(): cash_limit = portfolio.cash * mag date, stock_price = stock_market_data[comp].get_last() shares_amount = cash_limit / stock_price if shares_amount > 0: orders.append(Order(OrderType.BUY, comp, shares_amount)) elif action.is_sell(): shares_amount = portfolio.get_stock(comp) * mag if shares_amount > 0: orders.append(Order(OrderType.SELL, comp, shares_amount)) self.days_passed += 1 return orders
def choose_actions(self, stock_data_a: StockData, stock_data_b: StockData, portfolio: Portfolio, order_list: List[Order], epsilon=None, model_choice=None): assert epsilon is not None assert model_choice is not None action_a = None action_b = None if random.random() < self.epsilon: action_comb = random.randrange(10) else: action_comb = model_choice potential_buy_a = int(portfolio.cash // stock_data_a.get_last()[-1]) potential_buy_b = int(portfolio.cash // stock_data_b.get_last()[-1]) potential_sell_a = portfolio.get_stock(Company.A) potential_sell_b = portfolio.get_stock(Company.B) """10 action combinations: - buy 100% A, buy 0% B # buy only A completely - buy 100% A, sell 100% B # sell all B, buy all A - buy 50% A, buy 50% B # buy both - buy 0% A, buy 100% B # buy only B completely - sell 100% A, sell 0% B # sell only A completely - sell 100% A, sell 100% B # sell both completely - sell 100% A, buy 100% B # sell all A, buy all B - sell 50% A, sell 50% B # sell both half - sell 0% A, sell 100% B # sell only B completely - hold # do nothing """ logger.debug(f"{self.get_name()}: chooses action comb {action_comb}") if action_comb == 0: # buy 100% A, buy 0% B # buy only A completely action_a = OrderType.BUY.value action_b = 0 order_list.append(Order(OrderType.BUY, Company.A, potential_buy_a)) elif action_comb == 1: # buy 100% A, sell 100% B # sell all B, buy all A action_a = OrderType.BUY.value action_b = OrderType.SELL.value order_list.append(Order(OrderType.BUY, Company.A, potential_buy_a)) order_list.append( Order(OrderType.SELL, Company.B, potential_sell_b)) elif action_comb == 2: # buy 50% A, buy 50% B # buy both action_a = OrderType.BUY.value action_b = OrderType.SELL.value order_list.append( Order(OrderType.BUY, Company.A, potential_buy_a // 2)) remaining_cash = portfolio.cash - (potential_buy_a // 2) * stock_data_a.get_last()[-1] potential_buy_b = int(remaining_cash // stock_data_b.get_last()[-1]) order_list.append(Order(OrderType.SELL, Company.B, potential_buy_b)) elif action_comb == 3: # buy 0% A, buy 100% B # buy only B completely action_a = 0 action_b = OrderType.BUY.value order_list.append(Order(OrderType.BUY, Company.B, potential_buy_b)) elif action_comb == 4: # sell 100% A, sell 0% B # sell only A completely action_a = OrderType.SELL.value action_b = 0 order_list.append( Order(OrderType.SELL, Company.A, potential_sell_a)) elif action_comb == 5: # sell 100% A, sell 100% B # sell both completely action_a = OrderType.SELL.value action_b = OrderType.SELL.value order_list.append( Order(OrderType.SELL, Company.A, potential_sell_a)) order_list.append( Order(OrderType.SELL, Company.B, potential_sell_b)) elif action_comb == 6: # sell 100% A, buy 100% B # sell all A, buy all B action_a = OrderType.SELL.value action_b = OrderType.BUY.value order_list.append( Order(OrderType.SELL, Company.A, potential_sell_a)) order_list.append(Order(OrderType.BUY, Company.B, potential_buy_b)) elif action_comb == 7: # sell 50% A, sell 50% B # sell both half action_a = OrderType.SELL.value action_b = OrderType.SELL.value order_list.append( Order(OrderType.SELL, Company.A, potential_sell_a // 2)) order_list.append( Order(OrderType.SELL, Company.B, potential_sell_b // 2)) elif action_comb == 8: # sell 0% A, sell 100% B # sell only B completely action_a = 0 action_b = OrderType.SELL.value order_list.append( Order(OrderType.SELL, Company.B, potential_sell_b)) elif action_comb == 9: # hold # do nothing action_a = 0 action_b = 0 return action_a, action_b, order_list
def trade(self, portfolio: Portfolio, stock_market_data: StockMarketData) -> List[Order]: """ Generate action to be taken on the "stock market" Args: portfolio : current Portfolio of this traders stock_market_data : StockMarketData for evaluation Returns: A OrderList instance, may be empty never None """ assert portfolio is not None assert stock_market_data is not None assert stock_market_data.get_companies() == [Company.A, Company.B] # INPUT layer: 1 (buy or sell A?) # output layer: 2 ([buy_A, sell_A]) # TODO Compute the current state stock_data_a = stock_market_data[Company.A] vote_a_for_a = self.expert_a.vote(stock_data_a) vote_b_for_a = self.expert_b.vote(stock_data_a) #stock_data_b = stock_market_data[Company.B] #vote_b = self.expert_a.vote(stock_data_b) state = np.array([[ self.vote_map[vote_a_for_a] + self.vote_map[vote_b_for_a] ]]) #, self.vote_map[vote_b]]) # do action 0 or 1? predictions = self.model.predict(state) action = np.argmax(predictions) current_price_a = stock_market_data.get_most_recent_price(Company.A) order_list = [] if action == 0: # buy all A amount_to_buy = portfolio.cash // current_price_a if amount_to_buy > 0: order_list.append( Order(OrderType.BUY, Company.A, amount_to_buy)) elif action == 1: # sell all A amount_to_sell = portfolio.get_stock(Company.A) if amount_to_sell > 0: order_list.append( Order(OrderType.SELL, Company.A, amount_to_sell)) else: assert False if self.last_state is not None: # train diff_a = (current_price_a / self.last_price_a - 1) if self.last_action_a == 0: rec_vec = np.array([[diff_a, -diff_a]]) elif self.last_action_a == 1: rec_vec = np.array([[diff_a, -diff_a]]) else: assert False # wtf #reward_vec = np.array([[portfolio.get_value(stock_market_data)]]) self.model.fit(self.last_state, rec_vec) self.last_state = state self.last_action_a = action self.last_portfolio_value = portfolio.get_value(stock_market_data) self.last_price_a = current_price_a return order_list