def savegame(config): # Step 1: init Game env = Environment(config.game_num) #1 is for main game 2 is for evaluation ################### # Step 2: init DQN actions = env.action_size() objects = env.object_size() config.setnumactions(actions) config.setnumobjects(objects) config.setvocabsize(env.vocab_size()) brain = DQN(config) # checkStates = None #adding progress bar for training dic = {} with open("symbolMapping" + str(sys.argv[1]) + ".txt", 'r') as fp: data = fp.read().split('\n') for i in range(len(data) - 1): splitdata = data[i].split(' ') dic[int(splitdata[1])] = splitdata[0] dic[0] = "NULL" fp = open("teacher" + str(sys.argv[1]) + "_embeddings.txt", "w") for i in range(config.vocab_size - 1): state = np.zeros([config.batch_size, config.seq_length]) state[:, 0] = i embedding = brain.output_embedT.eval( feed_dict={brain.stateInputT: state}, session=brain.session)[0, 0, :] print >> fp, dic[i] for element in embedding: print >> fp, element, print >> fp brain.session.close()
def main(train): LOGGER.info('=== main started ===') environment = Environment(ticker, from_date=datetime(2004, 1, 1), to_date=datetime(2010, 1, 1)) agent = Agent(environment.state_size(), environment.action_size(), epochs=epochs) environment.set_agent(agent) if train: for i in range(epochs): environment.reset() environment.run() agent.decrease_epsilon() LOGGER.info('#### {}/{} game finished ####\nBalance: {}'.format( str(i + 1), epochs, environment.cerebro.broker.get_value())) agent.save(environment.ticker + '.h5') else: agent.load(environment.ticker + '.h5') # Test on! test_environment = Environment(ticker, from_date=datetime(2010, 1, 1), to_date=datetime(2013, 1, 1), scaler=environment.scaler) test_environment.set_agent(agent) test_environment.reset() test_environment.run() LOGGER.info('Backtest balance: {}'.format( test_environment.cerebro.broker.get_value()))
def savegame(config): # Step 1: init Game env = Environment(config.game_num) #1 is for main game 2 is for evaluation ################### # Step 2: init DQN actions = env.action_size() objects = env.object_size() config.setnumactions(actions) config.setnumobjects(objects) config.setvocabsize(env.vocab_size()) brain = DQN(config) # checkStates = None #adding progress bar for training dic = {} with open("symbolMapping" + str(sys.argv[1]) + ".txt", 'r') as fp: data = fp.read().split('\n') for i in range(len(data) - 1): splitdata = data[i].split(' ') dic[int(splitdata[1])] = splitdata[0] dic[0] = "NULL" dic_trans = {} with open("symbolMapping1236.txt", 'r') as fp: data = fp.read().split('\n') for i in range(len(data) - 1): splitdata = data[i].split(' ') dic_trans[splitdata[0]] = int(splitdata[1]) dic_trans["NULL"] = 0 dic_embedding = {} #1st let us initialize it randomly sess = tf.InteractiveSession() stateInput = tf.placeholder(tf.int32, [len(dic_trans.keys())]) embed = tf.Variable(tf.random_uniform([len(dic_trans.keys()), 20], -1, 1), name="embed") word_embeds = tf.nn.embedding_lookup(embed, stateInput) tf.initialize_all_variables().run() state = sorted(dic_trans.values()) state_map = word_embeds.eval(feed_dict={stateInput: state}) for i in range(len(state)): dic_embedding[state[i]] = state_map[i] sess.close() for i in range(config.vocab_size - 1): state = np.zeros([config.batch_size, config.seq_length]) state[:, 0] = i embedding = brain.word_embeds.eval(feed_dict={brain.stateInput: state}, session=brain.session)[0, 0] dic_embedding[dic_trans[dic[i]]] = embedding brain.session.close() cpickle.dump(dic_embedding, open("embedTeacher" + str(sys.argv[1]) + ".p", "wb"))
def main(train, action_bias=0): environment = Environment(tickers, initial_deposit=100000, from_date=datetime(2004, 1, 1), to_date=datetime(2010, 1, 1), min_days_to_hold=min_days_to_hold, max_days_to_hold=max_days_to_hold) agent = Agent(environment.state_size(), environment.action_size(), epochs=epochs, gamma=0.2, replay_buffer=64, memory_queue_length=32) if train: for i in range(epochs): state = environment.reset() done = False while not done: action = agent.act(state) next_state, reward, done = environment.step(action) agent.remember(state, action, reward, next_state, done) state = next_state agent.decrease_epsilon() LOGGER.info('Balance for current game: %d', environment.deposit) pprint(environment.actions) agent.save(environment.main_ticker + '.h5') else: agent.load(environment.main_ticker + '.h5') # Test on! test_environment = Environment(tickers, initial_deposit=100000, from_date=datetime(2010, 1, 1), to_date=datetime(2013, 1, 1), min_days_to_hold=min_days_to_hold, max_days_to_hold=max_days_to_hold, scaler=environment.scaler) state = test_environment.reset() done = False while not done: action = agent.act(state, False, action_bias) next_state, _, done = test_environment.step(action) state = next_state print_results_on_test_environment(test_environment) export_to_file(test_environment.actions)
def main(): config = Config() env = Environment(config) #for training eval_env = Eval_Environment(config)#for testing num_actions = env.action_size() config.setaction_set_size(num_actions) brain = Control(config) plt = Plotter() plt.writesummary(0) #adding progress bar for training pbar = tqdm(total = config.MAX_FRAMES, desc='Training Progress') episode_buffer = Buffer(config) episode_length = 0 eval_count = 1 while(env.frame_history <= config.MAX_FRAMES): if env.frame_history/(config.EVAL_FREQ*eval_count) == 1: evaluate(eval_env,config,brain,env.frame_history,plt)#testing happens now eval_count+=1 past_num_frames = env.frame_history #algorithm beigns now if episode_length == 0: env.reset() s,a,r,t = env.act(0) episode_buffer.add(s,a,r) episode_length += 1 s,a,r,t = env.act(brain.getaction(s)) episode_length += 1 episode_buffer.add(s,a,r) if (env.START_NEW_GAME or episode_length >= config.T) and not(episode_buffer.isempty()):#then epsiode ends episode_values = episode_buffer.get_returns() brain.update_table(episode_values) episode_buffer.reset() episode_length = 0 pbar.update(env.frame_history-past_num_frames) env.close_render()
def savegame(config): fp = open('symbolMapping'+str(sys.argv[1])+'.txt','r') data = fp.read().split('\n') spd = [data_.split(' ')[::-1] for data_ in data] dic_local = dict(spd[0:-1]) dic_local['0'] = 'NULL' fp.close() fp = open('symbolMapping5.txt','r') data = fp.read().split('\n') spd = [data_.split(' ')for data_ in data] dic_global = dict(spd[0:-1]) dic_global['NULL']='0' fp.close() # Step 1: init Game env = Environment(config.game_num) #1 is for main game 2 is for evaluation ################### # Step 2: init DQN actions = env.action_size() objects = env.object_size() config.setnumactions(actions) config.setnumobjects(objects) config.setvocabsize(env.vocab_size()) brain = DQN(config) # checkStates = None #adding progress bar for training pbar = tqdm(total = config.MAX_FRAMES, desc='Training Progress') episode_length = 0 num_episodes = 0 total_reward = 0 MAX_STEPS = 105000 totalSteps = 0 MEM_STEPS = 10000 memory = [] while totalSteps < MAX_STEPS: totalSteps += 1 if env.START_NEW_GAME: episode_length = 0 env.START_NEW_GAME = False state, reward, terminal, availableObjects = env.newGame() brain.history.add(state) action_indicator = np.zeros(actions) object_indicator = np.zeros(objects) #predict action_index,object_index = brain.getAction(availableObjects, True) Qactions, Qobjects = brain.getQValues(availableObjects) action_indicator[action_index] = 1 object_indicator[object_index] = 1 # print state memory.append((convert_state(state, dic_local, dic_global), Qactions, Qobjects)) #act nextstate,reward,terminal, availableObjects = env.step(action_index,object_index) total_reward += reward episode_length += 1 #observe brain.setPerception(state, reward, action_indicator, object_indicator, nextstate, terminal, True) state = nextstate if (totalSteps % MEM_STEPS == 0): fileName = str(config.game_num) + "_mem.txt" with open(fileName, "a") as fp: for i in range(len(memory)): for j in memory[i][0]: print >> fp, j, print >> fp for j in memory[i][1]: print >> fp, j, print >> fp for j in memory[i][2]: print >> fp, j, print >> fp memory = [] if ((terminal) or ((episode_length % config.max_episode_length) == 0)): num_episodes += 1 with open("saver_reward.txt", "a") as fp: print >> fp, (total_reward / (num_episodes * 1.0)) env.START_NEW_GAME = True pbar.update(1) if (brain.timeStep) > config.MAX_FRAMES: brain.train_writer.close() break brain.session.close()
def playgame(config): # Step 1: init Game env = Environment(config.game_num) #1 is for main game 2 is for evaluation ################### # Step 2: init DQN actions = env.action_size() objects = env.object_size() config.setnumactions(actions) config.setnumobjects(objects) config.setvocabsize(env.vocab_size()) brain = DQN(config) # checkStates = None #adding progress bar for training pbar = tqdm(total=config.MAX_FRAMES, desc='Training Progress') episode_length = 0 num_episodes = 0 total_reward = 0 while True: if env.START_NEW_GAME: episode_length = 0 env.START_NEW_GAME = False state, reward, terminal, availableObjects = env.newGame() brain.history.add(state) action_indicator = np.zeros(actions) object_indicator = np.zeros(objects) #predict action_index, object_index = brain.getAction(availableObjects) action_indicator[action_index] = 1 object_indicator[object_index] = 1 #act nextstate, reward, terminal, availableObjects = env.step( action_index, object_index) total_reward += reward episode_length += 1 #observe brain.setPerception(state, reward, action_indicator, object_indicator, nextstate, terminal, False) state = nextstate if ((terminal) or ((episode_length % config.max_episode_length) == 0)): num_episodes += 1 with open("train_reward.txt", "a") as fp: print >> fp, (total_reward / (num_episodes * 1.0)) env.START_NEW_GAME = True ##################################################################### #for evaluating qvalues if (brain.timeStep % config.EVAL == 0) and (brain.timeStep != 0): if (brain.timeStep / config.EVAL == 1): if not ((os.path.exists("checkStates.txt")) and (os.path.getsize("checkStates.txt") > 0)): assert config.SAMPLE_STATES % config.BATCH_SIZE == 0 assert config.SAMPLE_STATES < brain.memory.count checkStates, _1, _2, _3, _4, _5 = brain.memory.sample() with open("checkStates.txt", "w") as fp: cpickle.dump(checkStates, fp) else: with open("checkStates.txt", 'r') as fp: checkStates = cpickle.load(fp) evalQValues_a = brain.action_valueT.eval( feed_dict={brain.stateInputT: checkStates}, session=brain.session) maxEvalQValues_a = np.max(evalQValues_a, axis=1) avgEvalQValues_a = np.mean(maxEvalQValues_a) with open("evalQValue_a.txt", "a") as fp: print >> fp, avgEvalQValues_a evalQValues_o = brain.object_valueT.eval( feed_dict={brain.stateInputT: checkStates}, session=brain.session) maxEvalQValues_o = np.max(evalQValues_o, axis=1) avgEvalQValues_o = np.mean(maxEvalQValues_o) with open("evalQValue_o.txt", "a") as fp: print >> fp, avgEvalQValues_o ##################################################################### #save current history before starting evaluation # temp_history_data = brain.history.copy() #now let us evaluate avg reward #create alternate environment for EVALUATION # env_eval = Environment(2) env_eval = env if config.TUTORIAL_WORLD: total_reward, nrewards, nepisodes, quest1_reward_cnt, quest2_reward_cnt, quest3_reward_cnt = evaluate( brain, env_eval, config) else: total_reward, nrewards, nepisodes, quest1_reward_cnt = evaluate( brain, env_eval, config) with open("test_reward.txt", "a") as fp: print >> fp, total_reward #setting the best network if len(env_eval.reward_history) == 0 or total_reward > max( env_eval.reward_history): # save best network if not os.path.exists(os.getcwd() + '/Savednetworks'): os.makedirs(os.getcwd() + '/Savednetworks') brain.saver.save(brain.session, os.getcwd() + '/Savednetworks/' + 'network' + '-dqn', global_step=brain.timeStep) env_eval.reward_history.append( total_reward) #doing this for keeping track of best network #go back to saved frame after evaluation completed # brain.history.add(temp_history_data) ##################################################################### if config.TUTORIAL_WORLD: brain.inject_summary( { 'average.q_a': avgEvalQValues_a, 'average.q_o': avgEvalQValues_o, 'average.q': (0.5 * avgEvalQValues_o + 0.5 * avgEvalQValues_a), 'average_reward': total_reward, 'average_num_pos_reward': nrewards, 'number_of_episodes': nepisodes, 'quest1_average_reward_cnt': quest1_reward_cnt, 'quest2_average_reward_cnt': quest2_reward_cnt, 'quest3_average_reward_cnt': quest3_reward_cnt }, brain.timeStep) else: brain.inject_summary( { 'average.q_a': avgEvalQValues_a, 'average.q_o': avgEvalQValues_o, 'average.q': (0.5 * avgEvalQValues_o + 0.5 * avgEvalQValues_a), 'average_reward': total_reward, 'average_numrewards': nrewards, 'number_of_episodes': nepisodes, 'quest1_average_reward_cnt': quest1_reward_cnt }, brain.timeStep) ##################################################################### pbar.update(1) if (brain.timeStep) > config.MAX_FRAMES: brain.train_writer.close() break brain.session.close()
def learnstudent(config): # Step 1: init Game env = Environment(config.game_num) #1 is for main game 2 is for evaluation ################### # Step 2: init DQN actions = env.action_size() objects = env.object_size() config.setnumactions(actions) config.setnumobjects(objects) config.setvocabsize(env.vocab_size()) brain = student(config) brain.data[1] = reader('1_mem.txt') brain.data[2] = reader('2_mem.txt') brain.data[3] = reader('3_mem.txt') #adding progress bar for training pbar = tqdm(total=config.MAX_FRAMES, desc='Training Progress') while True: for _ in range(1, 4): brain.train(_) brain.timeStep += 1 ##################################################################### #for evaluating qvalues if (brain.timeStep % 100) == 0 and (brain.timeStep != 0): env_eval = env # save last network if not os.path.exists(os.getcwd() + '/StudentSavednetworks'): os.makedirs(os.getcwd() + '/StudentSavednetworks') brain.saver.save(brain.session, os.getcwd() + '/StudentSavednetworks/' + 'network' + '-student', global_step=brain.timeStep) if config.TUTORIAL_WORLD: total_reward, nrewards, nepisodes, quest1_reward_cnt, quest2_reward_cnt, quest3_reward_cnt = evaluate( brain, env_eval, config) else: total_reward, nrewards, nepisodes, quest1_reward_cnt = evaluate( brain, env_eval, config) ##################################################################### if config.TUTORIAL_WORLD: brain.inject_summary( { 'average_reward': total_reward, 'average_num_pos_reward': nrewards, 'number_of_episodes': nepisodes, 'quest1_average_reward_cnt': quest1_reward_cnt, 'quest2_average_reward_cnt': quest2_reward_cnt, 'quest3_average_reward_cnt': quest3_reward_cnt }, brain.timeStep) else: brain.inject_summary( { 'average_reward': total_reward, 'average_numrewards': nrewards, 'number_of_episodes': nepisodes, 'quest1_average_reward_cnt': quest1_reward_cnt }, brain.timeStep) ##################################################################### pbar.update(1) if (brain.timeStep) > config.MAX_FRAMES: brain.train_writer.close() break brain.session.close()