def run_sim(rank, params, shared_model, shared_optimizer, count, lock): if not os.path.exists('./' + params.weight_dir): os.mkdir('./' + params.weight_dir) if not os.path.exists('./log'): os.mkdir('./log') logging.basicConfig(filename='./log/' + params.log_file + '.log', level=logging.INFO) ptitle('Training Agent: {}'.format(rank)) gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)] api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id) cfg = load_config('config.json') torch.manual_seed(random.randint(0, 1000) + rank) if gpu_id >= 0: torch.cuda.manual_seed(random.randint(0, 1000) + rank) model = A3C_LSTM_GA() with torch.cuda.device(gpu_id): model = model.cuda() Agent = run_agent(model, gpu_id) house_id = params.house_id if house_id == -1: house_id = rank if house_id > 50: house_id = house_id % 50 env = Environment(api, get_house_id(house_id, params.difficulty), cfg) task = RoomNavTask(env, hardness=params.hardness, segment_input=params.semantic_mode, max_steps=params.max_steps, discrete_action=True) n_train = 0 best_rate = 0.0 save_model_index = 0 while True: n_train += 1 training(task, gpu_id, shared_model, Agent, shared_optimizer, params, lock, count) if n_train % 1000 == 0: with lock: n_update = count.value with torch.cuda.device(gpu_id): Agent.model.load_state_dict(shared_model.state_dict()) start_time = time.time() best_rate, save_model_index = testing(lock, n_update, gpu_id, Agent, task, best_rate, params, save_model_index, start_time, logging, house_id)
def main(): params = Params() mp.set_start_method('spawn') count = mp.Value('i', 0) best_acc = mp.Value('d', 0.0) lock = mp.Lock() shared_model = A3C_LSTM_GA() shared_model = shared_model.share_memory() shared_optimizer = SharedAdam(shared_model.parameters(), lr=params.lr, amsgrad=params.amsgrad, weight_decay=params.weight_decay) shared_optimizer.share_memory() #run_sim(0, params, shared_model, None, count, lock) #test(params, shared_model, count, lock, best_acc) processes = [] train_process = 0 test_process = 0 for rank in range(params.n_process): p = mp.Process(target=test, args=( test_process, params, shared_model, count, lock, best_acc, )) p.start() processes.append(p) test_process += 1 for i in range(2): p = mp.Process(target=run_sim, args=( train_process, params, shared_model, shared_optimizer, count, lock, )) p.start() processes.append(p) train_process += 1 for p in processes: p.join()
def __init__(self): args = parser.parse_args() args.input_size = len(word_to_idx) shared_model = A3C_LSTM_GA(args) shared_model = shared_model.cuda() # Load the model if (args.load != "0"): shared_model.load_state_dict( torch.load(args.load, map_location=lambda storage, loc: storage)) shared_model.share_memory() self.test_model = test(args, shared_model)
def __init__(self, args, share_model): torch.manual_seed(args.seed + 0) self.model = A3C_LSTM_GA(args) self.model = self.model.cuda() self.word_to_idx = { 'object': 4, 'cylinder': 5, 'blue': 7, 'Go': 0, 'cube': 8, 'green': 9, 'ball': 6, 'red': 3, 'the': 2, 'yellow': 10, 'to': 1, 'go': 11, 'any': 12, 'then': 13, 'sphere': 6 } if (args.load != "0"): print("Loading model ... "+ args.load) self.model.load_state_dict( torch.load(args.load, map_location=lambda storage, loc: storage)) self.model.eval() self.start = True self.episode_length = 0
def __init__(self, params, gpu_id=0): self.params = params self.device = "cuda:" + str(gpu_id) self.model = A3C_LSTM_GA().to(self.device) self.optimizer = optim.Adam(self.model.parameters(), lr=params.lr, amsgrad=params.amsgrad, weight_decay=params.weight_decay) self.hx = torch.zeros(self.params.n_process, 256).to(self.device) self.cx = torch.zeros(self.params.n_process, 256).to(self.device) self.eps_len = 0 self.values = [] self.log_probs = [] self.rewards = [] # self.entropies = [] self.done = False self.info = None self.reward = 0 self.gpu_id = gpu_id self.target = None self.n_update = 0 self.num_steps = params.num_steps self.step = 0 self.rewards = torch.zeros(self.num_steps, params.n_process, 1).to(self.device) self.value_preds = torch.zeros(self.num_steps + 1, params.n_process, 1).to(self.device) self.returns = torch.zeros(self.num_steps + 1, params.n_process, 1).to(self.device) self.action_log_probs = torch.zeros(self.num_steps, params.n_process, 1).to(self.device) self.masks = torch.ones(self.num_steps + 1, params.n_process, 1).to(self.device) self.entropies = torch.zeros(self.num_steps, params.n_process, 1).to(self.device)
args.use_train_instructions = 0 args.num_processes = 0 log_filename = "test-ZSL.log" else: assert False, "Invalid evaluation type" env = grounding_env.GroundingEnv(args) args.input_size = len(env.word_to_idx) # Setup logging if not os.path.exists(args.dump_location): os.makedirs(args.dump_location) logging.basicConfig(filename=args.dump_location + log_filename, level=logging.INFO) shared_model = A3C_LSTM_GA(args) # Load the model if (args.load != "0"): shared_model.load_state_dict( torch.load(args.load, map_location=lambda storage, loc: storage)) shared_model.share_memory() processes = [] # Start the test thread p = mp.Process(target=test, args=(args.num_processes, args, shared_model)) p.start() processes.append(p)
def run_sim(rank, params, shared_model, shared_optimizer, count, lock): ptitle('Training Agent: {}'.format(rank)) gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)] api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id) cfg = load_config('config.json') if shared_optimizer is None: optimizer = optim.Adam(shared_model.parameters(), lr=params.lr, amsgrad=params.amsgrad, weight_decay=params.weight_decay) #optimizer.share_memory() else: optimizer = shared_optimizer torch.manual_seed(params.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(params.seed + rank) model = A3C_LSTM_GA() with torch.cuda.device(gpu_id): model = model.cuda() Agent = run_agent(model, gpu_id) house_id = params.house_id if house_id == -1: house_id = rank if house_id >= 20: house_id = house_id % 20 env = Environment(api, get_house_id(house_id), cfg) task = RoomNavTask(env, hardness=params.hardness, segment_input=params.semantic_mode, max_steps=params.max_steps, discrete_action=True) for episode in range(params.max_episode): next_observation = task.reset() target = task.info['target_room'] target = get_instruction_idx(target) with torch.cuda.device(gpu_id): target = Variable(torch.LongTensor(target)).cuda() Agent.model.load_state_dict(shared_model.state_dict()) Agent.cx = Variable(torch.zeros(1, 256).cuda()) Agent.hx = Variable(torch.zeros(1, 256).cuda()) Agent.target = target total_reward, num_steps, good = 0, 0, 0 Agent.done = False done = False Agent.eps_len = 0 while not done: num_steps += 1 observation = next_observation act, entropy, value, log_prob = Agent.action_train( observation, target) next_observation, reward, done, info = task.step(actions[act[0]]) rew = np.clip(reward, -1.0, 1.0) Agent.put_reward(rew, entropy, value, log_prob) if num_steps % params.num_steps == 0 or done: if done: Agent.done = done with lock: count.value += 1 Agent.training(next_observation, shared_model, optimizer, params) if done: break
def test(rank, args, shared_model): torch.manual_seed(args.seed + rank) env = grounding_env.GroundingEnv(args) env.game_init() model = A3C_LSTM_GA(args) if (args.load != "0"): print("Loading model ... " + args.load) model.load_state_dict( torch.load(args.load, map_location=lambda storage, loc: storage)) model.eval() (image, instruction), _, _, _ = env.reset() # Print instruction while evaluating and visualizing if args.evaluate != 0 and args.visualize == 1: print("Instruction: {} ".format(instruction)) # Getting indices of the words in the instruction instruction_idx = [] for word in instruction.split(" "): instruction_idx.append(env.word_to_idx[word]) instruction_idx = np.array(instruction_idx) image = torch.from_numpy(image).float() / 255.0 instruction_idx = torch.from_numpy(instruction_idx).view(1, -1) reward_sum = 0 done = True start_time = time.time() episode_length = 0 rewards_list = [] accuracy_list = [] episode_length_list = [] num_episode = 0 best_reward = 0.0 test_freq = 50 while True: episode_length += 1 if done: if (args.evaluate == 0): model.load_state_dict(shared_model.state_dict()) cx = Variable(torch.zeros(1, 256), volatile=True) hx = Variable(torch.zeros(1, 256), volatile=True) else: cx = Variable(cx.data, volatile=True) hx = Variable(hx.data, volatile=True) tx = Variable(torch.from_numpy(np.array([episode_length])).long(), volatile=True) value, logit, (hx, cx) = model((Variable(image.unsqueeze(0), volatile=True), Variable(instruction_idx, volatile=True), (tx, hx, cx))) prob = F.softmax(logit) action = prob.max(1)[1].data.numpy() (image, _), reward, done, _ = env.step(action[0]) done = done or episode_length >= args.max_episode_length reward_sum += reward if done: num_episode += 1 rewards_list.append(reward_sum) # Print reward while evaluating and visualizing if args.evaluate != 0 and args.visualize == 1: print("Total reward: {}".format(reward_sum)) episode_length_list.append(episode_length) if reward == CORRECT_OBJECT_REWARD: accuracy = 1 else: accuracy = 0 accuracy_list.append(accuracy) if (len(rewards_list) >= test_freq): print(" ".join([ "Time {},".format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time))), "Avg Reward {},".format(np.mean(rewards_list)), "Avg Accuracy {},".format(np.mean(accuracy_list)), "Avg Ep length {},".format(np.mean(episode_length_list)), "Best Reward {}".format(best_reward) ])) logging.info(" ".join([ "Time {},".format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time))), "Avg Reward {},".format(np.mean(rewards_list)), "Avg Accuracy {},".format(np.mean(accuracy_list)), "Avg Ep length {},".format(np.mean(episode_length_list)), "Best Reward {}".format(best_reward) ])) if np.mean(rewards_list) >= best_reward and args.evaluate == 0: torch.save(model.state_dict(), args.dump_location + "model_best") best_reward = np.mean(rewards_list) rewards_list = [] accuracy_list = [] episode_length_list = [] reward_sum = 0 episode_length = 0 (image, instruction), _, _, _ = env.reset() # Print instruction while evaluating and visualizing if args.evaluate != 0 and args.visualize == 1: print("Instruction: {} ".format(instruction)) # Getting indices of the words in the instruction instruction_idx = [] for word in instruction.split(" "): instruction_idx.append(env.word_to_idx[word]) instruction_idx = np.array(instruction_idx) instruction_idx = torch.from_numpy(instruction_idx).view(1, -1)
def test(rank, params, shared_model, count, lock, best_acc, evaluation=True): if not os.path.exists('./' + params.weight_dir): os.mkdir('./' + params.weight_dir) if not os.path.exists('./log'): os.mkdir('./log') logging.basicConfig(filename='./log/' + params.log_file + '.log', level=logging.INFO) ptitle('Test Agent: {}'.format(rank)) gpu_id = params.gpu_ids_test[rank % len(params.gpu_ids_test)] api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id) cfg = load_config('config.json') best_rate = 0.0 save_model_index = 0 n_update = 0 torch.manual_seed(params.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(params.seed + rank) model = A3C_LSTM_GA() with torch.cuda.device(gpu_id): model = model.cuda() Agent = run_agent(model, gpu_id) house_id = params.house_id if house_id == -1: house_id = rank if house_id >= 20: house_id = house_id % 20 #time.sleep(rank*30) env = Environment(api, get_house_id(house_id), cfg) task = RoomNavTask(env, hardness=params.hardness, segment_input=params.semantic_mode, max_steps=params.max_steps, discrete_action=True) #reward_type='indicator' start_time = time.time() if evaluation is True: max_episode = params.max_episode n_try = params.n_eval else: max_episode = 1 # for loaded model test n_try = params.n_test for episode in range(max_episode): eval = [] if evaluation is True: with lock: n_update = count.value with torch.cuda.device(gpu_id): Agent.model.load_state_dict(shared_model.state_dict()) else: with torch.cuda.device(gpu_id): Agent.model.load_state_dict(shared_model) Agent.model.eval() for i in range(n_try): next_observation = task.reset() target = task.info['target_room'] target = get_instruction_idx(target) with torch.cuda.device(gpu_id): target = Variable(torch.LongTensor(target)).cuda() Agent.cx = Variable(torch.zeros(1, 256).cuda()) Agent.hx = Variable(torch.zeros(1, 256).cuda()) Agent.target = target step, total_rew, good = 0, 0, 0 done = False while not done: observation = next_observation act = Agent.action_test(observation, target) next_observation, rew, done, info = task.step(actions[act[0]]) total_rew += rew if rew == 10: # success good = 1 step += 1 if done: break eval.append((step, total_rew, good)) if len(eval) > 0: succ = [e for e in eval if e[2] > 0] succ_rate = (len(succ) / len(eval)) * 100 if evaluation is True: # evaluation mode with lock: #if best_acc.value >= best_rate: # best_rate = best_acc.value if succ_rate >= best_rate: best_rate = succ_rate with torch.cuda.device(gpu_id): torch.save( Agent.model.state_dict(), params.weight_dir + 'model' + str(n_update) + '.ckpt') save_model_index += 1 #if best_rate > best_acc.value: # best_acc.value = best_rate avg_reward = sum([e[1] for e in eval]) / len(eval) avg_length = sum([e[0] for e in eval]) / len(eval) msg = " ".join([ "++++++++++ Task Stats +++++++++++\n", "Time {}\n".format( time.strftime("%dd %Hh %Mm %Ss", time.gmtime(time.time() - start_time))), "Episode Played: {:d}\n".format(len(eval)), "N_Update = {:d}\n".format(n_update), "House id: {:d}\n".format(house_id), "Avg Reward = {:5.3f}\n".format(avg_reward), "Avg Length = {:.3f}\n".format(avg_length), "Best rate {:3.2f}, Success rate {:3.2f}%".format( best_rate, succ_rate) ]) print(msg) logging.info(msg)
def run_test(rank, params, loaded_model, lock, seen_succ, seen_length, unseen_succ, unseen_length): logging.basicConfig(filename='./log/' + params.log_file + '.log', level=logging.INFO) ptitle('Test Agent: {}'.format(rank)) gpu_id = params.gpu_ids_test[rank % len(params.gpu_ids_test)] api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id) cfg = load_config('config.json') torch.manual_seed(params.seed + rank) if gpu_id >= 0: with torch.cuda.device(gpu_id): torch.cuda.manual_seed(params.seed + rank) model = A3C_LSTM_GA() with torch.cuda.device(gpu_id): model = model.cuda() load_model = torch.load( loaded_model, map_location=lambda storage, loc: storage.cuda(gpu_id)) model.load_state_dict(load_model) model.eval() Agent = run_agent(model, gpu_id) n_test = 0 start_time = time.time() while True: house_id = rank + (n_test * params.n_process) if house_id >= 70: break else: if house_id < 20: seen = True house = get_house_id(house_id) else: seen = False house = get_eval_house_id(house_id - (n_test * params.n_process)) env = Environment(api, house, cfg) task = RoomNavTask(env, hardness=params.hardness, segment_input=params.semantic_mode, max_steps=params.max_steps, discrete_action=True) #reward_type='indicator' eval = [] for i in range(params.n_test): next_observation = task.reset() target = task.info['target_room'] target = get_instruction_idx(target) with torch.cuda.device(gpu_id): target = Variable(torch.LongTensor(target)).cuda() Agent.cx = Variable(torch.zeros(1, 256).cuda()) Agent.hx = Variable(torch.zeros(1, 256).cuda()) Agent.target = target step, total_rew, good = 0, 0, 0 done = False while not done: observation = next_observation act = Agent.action_test(observation, target) next_observation, rew, done, info = task.step(actions[act[0]]) total_rew += rew if rew == 10: # success good = 1 step += 1 if done: break eval.append((step, total_rew, good)) if len(eval) > 0: succ = [e for e in eval if e[2] > 0] succ_rate = (len(succ) / len(eval)) * 100 avg_reward = sum([e[1] for e in eval]) / len(eval) avg_length = sum([e[0] for e in eval]) / len(eval) if seen: msg_seen = "Seen" msg_house = house_id else: msg_seen = "Unseen" msg_house = house_id - 20 msg = " ".join([ "++++++++++ Task Stats +++++++++++\n", "Time {}\n".format( time.strftime("%dd %Hh %Mm %Ss", time.gmtime(time.time() - start_time))), "Episode Played: {:d}\n".format(len(eval)), "{:s} House id: {:d}\n".format(msg_seen, msg_house), "Avg Reward = {:5.3f}\n".format(avg_reward), "Avg Length = {:.3f}\n".format(avg_length), "Success rate {:3.2f}%".format(succ_rate) ]) print(msg) logging.info(msg) with lock: if seen: seen_succ.value += len(succ) seen_length.value += sum([e[0] for e in eval]) else: unseen_succ.value += len(succ) unseen_length.value += sum([e[0] for e in eval]) n_test += 1