def main(): stock_name = "GSPC_2011-03" model_name = "model_ep10" model = load_model("models/" + model_name) window_size = model.layers[0].input.shape.as_list()[1] agent = Agent(window_size, True, model_name) market = Market(window_size, stock_name) state, price_data = market.reset() for t in range(market.last_data_index): action, bought_price = agent.act(state, price_data) next_state, next_price_data, reward, done = market.get_next_state_reward( action, bought_price) state = next_state price_data = next_price_data if done: print("----------------------------") print("{0} Total profit: {1}".format(stock_name, agent.get_total_profit)) print("----------------------------") plot_action_profit(market.data, agent.action_history, agent.get_total_profit())
def main(): stock_name = "GSPC_2011-03" model_name = "model_ep10" model = load_model("models/" + model_name) window_size = model.layers[0].input.shape.as_list()[1] agent = Agent(window_size, True, model_name) market = Market(window_size, stock_name) state, price_data = market.reset() #ToDo: Start from an initial state for t in range(market.last_data_index): action, bought_price = agent.act(state, price_data) # ToDo: Get action for the current state # Check the action to get reward and observe next state next_state, next_price_data, reward, done = market.get_next_state_reward(action, bought_price) #ToDo: get next state state = next_state price_data = next_price_data if done: print("--------------------------------") print("{0} Total Profit: {1}".format(stock_name, agent.get_total_profit())) print("--------------------------------") plot_action_profit(market.data, agent.action_history, agent.get_total_profit())
def main(): window_size = 5 episode_count = 10 stock_name = "GSPC_10" batch_size = 3 agent = Agent(window_size) market = Market(window_size=window_size, stock_name=stock_name) start_time = time.time() for e in range(episode_count + 1): print("Episode {0}/{1}".format(e, episode_count)) agent.reset() state, price_data = market.reset() for t in range(market.last_index): action, bought_price = agent.act(state, price_data) next_state, next_price_data, reward, done = market.get_next_state_reward( action, bought_price) agent.memory.append([state, action, reward, next_state, done]) if len(agent.memory) > batch_size: agent.experience_replay(batch_size) state = next_state price_data = next_price_data if done: print("----------------------") print("Total Profit: {0}".format(agent.get_total_profit())) print("----------------------") if e % 10 == 0: if not os.path.exists("models"): os.mkdir("models") agent.model.save("models/model_ep" + str(e)) end_time = time.time() training_time = end_time - start_time print("Training time {0}".format(training_time))
def main(): stock_name = "GSPC_2011-03" model_name = "model_ep30" window_size = 5 agent = Agent(window_size, True, model_name) market = Market(window_size, stock_name) state, price_data = market.reset() # Start from an initial state for t in range(market.last_data_index): action, bought_price = agent.act( state, price_data) # Get action for the current state # Check the action to get reward and observe next state next_state, next_price_data, reward, done = market.get_next_state_reward( action, bought_price) state = next_state price_data = next_price_data if done: print("--------------------------------") print("{0} Total Profit: {1}".format(stock_name, agent.get_total_profit())) print("--------------------------------") #toDo: change data plot_action_profit(market.data["Close"].values, agent.action_history, agent.get_total_profit())
def main_eval(): stock_name = "BABA" model_name = "model_ep0" model = load_model("models/" + model_name) window_size = model.layers[0].input.shape.as_list()[1] agent = Agent(window_size, True, model_name) market = Market(window_size, stock_name) state, price_data, date_data = market.reset() date = [] for t in range(market.last_data_index): action, bought_price = agent.act(state, price_data, date_data) next_state, next_price_data, next_date_data, reward, done = market.get_next_state_reward( action, bought_price) state = next_state price_data = next_price_data date_data = next_date_data if done: print("--------------------") print("{0} Total profit: {1}".format(stock_name, agent.get_total_profit())) print("--------------------") plot_action_profit(market.data, agent.action_history, agent.get_total_profit()) return agent.book, agent.initial_investment, agent.dates
def main(): window_size = 5 eposide_cnt = 2 stock_name = "GSPC_2011" batch_size = 32 profit_for_episode = [] total_action_history = [] agent = Agent(window_size) market = Market(window_size, stock_name) start_time = time.time() for e in range(1, eposide_cnt + 1): print("Episode {}/{}".format(e, eposide_cnt)) agent.reset() state, price_data = market.reset() for t in range(market.last_data_index): action, bought_price = agent.act(state, price_data) next_state, next_price_data, reward, done = \ market.get_next_state_reward(action, bought_price) agent.memory.append((state, action, reward, next_state, done)) if len(agent.memory) > batch_size: agent.exprience_replay(batch_size) state = next_state price_data = next_price_data if done: print("--------------------------------") print("Total profit: {}".format(agent.get_total_profit())) print("action history") print(Counter(agent.action_history).keys()) print(Counter(agent.action_history).values()) total_action_history.append(agent.action_history) print("--------------------------------") profit_for_episode.append(agent.get_total_profit()) if e % 10 == 0: if not os.path.exists("models"): os.mkdir("models") print(str(e)) agent.model.save("models/model_ep{}.h5".format(str(e))) end_time = time.time() training_time = end_time - start_time print("Training time took {:.2f} seconds.".format(training_time)) print("profit_for_episode = ", profit_for_episode) print("total action history ") for history in total_action_history: print(Counter(history).keys()) print(Counter(history).values())
def main(): window_size = 5 episode_count = 10 stock_name = "^GSPC_2011" agent = Agent(window_size) market = Market(window_size=window_size, stock_name=stock_name) batch_size = 32 start_time = time.time() for e in range(episode_count + 1): print("Episode " + str(e) + "/" + str(episode_count)) agent.reset() state, price_data = market.reset() #ToDo: get the initial state for t in range(market.last_data_index): # get the action of the agent action, bought_price = agent.act( state, price_data ) # ToDo: Call the act() method of the agent considering the current state # get the next state of the stock #ToDo: Get the next available state from market data next_state, next_price_data, reward, done = market.get_next_state_reward( action, bought_price) #ToDo: add the transaction to the memory agent.memory.append((state, action, reward, next_state, done)) # learn from the history if len(agent.memory) > batch_size: agent.experience_replay(batch_size) state = next_state price_data = next_price_data if done: print("--------------------------------") print("Total Profit: {0}".format(agent.get_total_profit())) print("--------------------------------") if e % 10 == 0: if not os.path.exists("models"): os.mkdir("models") agent.model.save("models/model_ep" + str(e)) end_time = time.time() training_time = round(end_time - start_time) print("Training took {0} seconds.".format(training_time))
def main(): window_size = 5 episode_count = 10 stock_name = 'GSPC_2011' batch_size = 32 agent = Agent(window_size) market = Market(window_size=window_size, stock_name=stock_name) start_time = time.time() for e in range(episode_count + 1): print("Episode {}/{}".format(e, episode_count)) agent.reset() state, price_data = market.reset() #get the initial state for t in range(market.last_data_index): ## get the action of the agent action, bought_price = agent.act(state, price_data) # get the next state of the stock # Get the next available state from market data next_state, next_price_data, reward, done = market.get_next_state_reward( action, bought_price) # add the transaction to the memory agent.memory.append((state, action, reward, next_state, done)) # learn from the history if len(agent.memory) > batch_size: agent.experience_replay(batch_size) state = next_state price_data = next_price_data if done: print("--------------------------") print("Total Profit : {}".format(agent.get_total_profit())) print("--------------------------") if e % 10 == 0: if not os.path.exists("models"): os.mkdir("models") # Makes folder agent.model.save("models/model_ep" + str(e)) end_time = time.time() training_time = end_time - start_time print("Training Time = {} seconds".format(training_time))
def main_train(): #last 10 changes of stock price windows_size = 10 #how many epochs episode_count = 100 stock_name = "BABA" batch_size = 32 agent = Agent(windows_size) market = Market(windows_size=windows_size, stock_name=stock_name) start_time = time.time() for e in range(episode_count + 1): print("Episode {0}/{1}.".format(e, episode_count)) agent.reset() state, price_data = market.reset() for t in range(market.last_data_index): action, bought_price = agent.act(state, price_data) next_state, next_price_data, reward, done = market.get_next_state_reward( action, bought_price) agent.memory.append((state, action, reward, next_state, done)) if len(agent.memory) > batch_size: agent.experience_replay(batch_size) state = next_state price_data = next_price_data if done: print("--------------------") print("Total profit: {0}".format(agent.get_total_profit())) print("--------------------") if e % 10 == 0: if not os.path.exists("models"): os.mkdir("models") agent.model.save("models/model_ep" + str(e)) end_time = time.time() training_time = end_time - start_time print("Training time took {0} seconds.".format(training_time))