def run_sim(rank, params, shared_model, shared_optimizer, count, lock): if not os.path.exists('./' + params.weight_dir): os.mkdir('./' + params.weight_dir) if not os.path.exists('./log'): os.mkdir('./log') logging.basicConfig(filename='./log/' + params.log_file + '.log', level=logging.INFO) ptitle('Training Agent: {}'.format(rank)) gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)] api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id) cfg = load_config('config.json') torch.manual_seed(random.randint(0, 1000) + rank) if gpu_id >= 0: torch.cuda.manual_seed(random.randint(0, 1000) + rank) model = A3C_LSTM_GA() with torch.cuda.device(gpu_id): model = model.cuda() Agent = run_agent(model, gpu_id) house_id = params.house_id if house_id == -1: house_id = rank if house_id > 50: house_id = house_id % 50 env = Environment(api, get_house_id(house_id, params.difficulty), cfg) task = RoomNavTask(env, hardness=params.hardness, segment_input=params.semantic_mode, max_steps=params.max_steps, discrete_action=True) n_train = 0 best_rate = 0.0 save_model_index = 0 while True: n_train += 1 training(task, gpu_id, shared_model, Agent, shared_optimizer, params, lock, count) if n_train % 1000 == 0: with lock: n_update = count.value with torch.cuda.device(gpu_id): Agent.model.load_state_dict(shared_model.state_dict()) start_time = time.time() best_rate, save_model_index = testing(lock, n_update, gpu_id, Agent, task, best_rate, params, save_model_index, start_time, logging, house_id)
def predict_house(student, weights): results = [[], [], [], []] house_rev = utils.get_house() for row in weights: if not np.isnan(student[row[1]]) and not np.isnan(student[row[2]]): x = np.array([student[row[1]], student[row[2]]]) theta = np.array([row[3], row[4], row[5]]) mean = np.array([row[6], row[7]]) std = np.array([row[8], row[9]]) x = (x - mean) / std x = np.insert(x, 0, 1, axis=0) results[utils.get_house_id(row[0])].append( utils.sigmoid(np.dot(x, theta))) for i in range(4): if (len(results[i]) != 0): # results[i] = max(results[i]) results[i] = sum(results[i]) / len(results[i]) else: results[i] = 0 return house_rev[results.index(max(results))]
def run_sim(rank, params, shared_model, shared_optimizer, count, lock): ptitle('Training Agent: {}'.format(rank)) gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)] api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id) cfg = load_config('config.json') if shared_optimizer is None: optimizer = optim.Adam(shared_model.parameters(), lr=params.lr, amsgrad=params.amsgrad, weight_decay=params.weight_decay) #optimizer.share_memory() else: optimizer = shared_optimizer torch.manual_seed(params.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(params.seed + rank) model = A3C_LSTM_GA() with torch.cuda.device(gpu_id): model = model.cuda() Agent = run_agent(model, gpu_id) house_id = params.house_id if house_id == -1: house_id = rank if house_id >= 20: house_id = house_id % 20 env = Environment(api, get_house_id(house_id), cfg) task = RoomNavTask(env, hardness=params.hardness, segment_input=params.semantic_mode, max_steps=params.max_steps, discrete_action=True) for episode in range(params.max_episode): next_observation = task.reset() target = task.info['target_room'] target = get_instruction_idx(target) with torch.cuda.device(gpu_id): target = Variable(torch.LongTensor(target)).cuda() Agent.model.load_state_dict(shared_model.state_dict()) Agent.cx = Variable(torch.zeros(1, 256).cuda()) Agent.hx = Variable(torch.zeros(1, 256).cuda()) Agent.target = target total_reward, num_steps, good = 0, 0, 0 Agent.done = False done = False Agent.eps_len = 0 while not done: num_steps += 1 observation = next_observation act, entropy, value, log_prob = Agent.action_train( observation, target) next_observation, reward, done, info = task.step(actions[act[0]]) rew = np.clip(reward, -1.0, 1.0) Agent.put_reward(rew, entropy, value, log_prob) if num_steps % params.num_steps == 0 or done: if done: Agent.done = done with lock: count.value += 1 Agent.training(next_observation, shared_model, optimizer, params) if done: break
def run_sim(rank, params, state_Queue, action_done, actions, reward_Queue, lock): ptitle('Training Agent: {}'.format(rank)) gpu_id = params.gpu_ids_train[rank % len(params.gpu_ids_train)] api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id) cfg = load_config('config.json') house_id = params.house_id if house_id == -1: house_id = rank if house_id > 50: house_id = house_id % 50 env = Environment(api, get_house_id(house_id, params.difficulty), cfg) task = RoomNavTask(env, hardness=params.hardness, segment_input=params.semantic_mode, max_steps=params.max_steps, discrete_action=True) while True: next_observation = task.reset() target = task.info['target_room'] target = get_instruction_idx(target) # with torch.cuda.device(gpu_id): # target = Variable(torch.LongTensor(target)).cuda() total_reward, num_steps, good = 0, 0, 0 done = False test = False while not done: num_steps += 1 observation = next_observation state = rank, [observation, target] state_Queue.put(state) state_Queue.join() # action_done.get() # action done action = actions[rank] if action == 99: test = True break # call for test next_observation, reward, done, info = task.step(action) reward = np.clip(reward, -1.0, 10.0) if reward != -1.0 and reward != 10.0: # make sparse reward reward = 0.0 total_reward += reward rew = [rank, done, reward] # print("send - rank: {:d}, reward: {:3.2f}".format(rank, reward)) reward_Queue.put(rew) reward_Queue.join() if done: break
def test(rank, params, shared_model, count, lock, best_acc, evaluation=True): if not os.path.exists('./' + params.weight_dir): os.mkdir('./' + params.weight_dir) if not os.path.exists('./log'): os.mkdir('./log') logging.basicConfig(filename='./log/' + params.log_file + '.log', level=logging.INFO) ptitle('Test Agent: {}'.format(rank)) gpu_id = params.gpu_ids_test[rank % len(params.gpu_ids_test)] api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id) cfg = load_config('config.json') best_rate = 0.0 save_model_index = 0 n_update = 0 torch.manual_seed(params.seed + rank) if gpu_id >= 0: torch.cuda.manual_seed(params.seed + rank) model = A3C_LSTM_GA() with torch.cuda.device(gpu_id): model = model.cuda() Agent = run_agent(model, gpu_id) house_id = params.house_id if house_id == -1: house_id = rank if house_id >= 20: house_id = house_id % 20 #time.sleep(rank*30) env = Environment(api, get_house_id(house_id), cfg) task = RoomNavTask(env, hardness=params.hardness, segment_input=params.semantic_mode, max_steps=params.max_steps, discrete_action=True) #reward_type='indicator' start_time = time.time() if evaluation is True: max_episode = params.max_episode n_try = params.n_eval else: max_episode = 1 # for loaded model test n_try = params.n_test for episode in range(max_episode): eval = [] if evaluation is True: with lock: n_update = count.value with torch.cuda.device(gpu_id): Agent.model.load_state_dict(shared_model.state_dict()) else: with torch.cuda.device(gpu_id): Agent.model.load_state_dict(shared_model) Agent.model.eval() for i in range(n_try): next_observation = task.reset() target = task.info['target_room'] target = get_instruction_idx(target) with torch.cuda.device(gpu_id): target = Variable(torch.LongTensor(target)).cuda() Agent.cx = Variable(torch.zeros(1, 256).cuda()) Agent.hx = Variable(torch.zeros(1, 256).cuda()) Agent.target = target step, total_rew, good = 0, 0, 0 done = False while not done: observation = next_observation act = Agent.action_test(observation, target) next_observation, rew, done, info = task.step(actions[act[0]]) total_rew += rew if rew == 10: # success good = 1 step += 1 if done: break eval.append((step, total_rew, good)) if len(eval) > 0: succ = [e for e in eval if e[2] > 0] succ_rate = (len(succ) / len(eval)) * 100 if evaluation is True: # evaluation mode with lock: #if best_acc.value >= best_rate: # best_rate = best_acc.value if succ_rate >= best_rate: best_rate = succ_rate with torch.cuda.device(gpu_id): torch.save( Agent.model.state_dict(), params.weight_dir + 'model' + str(n_update) + '.ckpt') save_model_index += 1 #if best_rate > best_acc.value: # best_acc.value = best_rate avg_reward = sum([e[1] for e in eval]) / len(eval) avg_length = sum([e[0] for e in eval]) / len(eval) msg = " ".join([ "++++++++++ Task Stats +++++++++++\n", "Time {}\n".format( time.strftime("%dd %Hh %Mm %Ss", time.gmtime(time.time() - start_time))), "Episode Played: {:d}\n".format(len(eval)), "N_Update = {:d}\n".format(n_update), "House id: {:d}\n".format(house_id), "Avg Reward = {:5.3f}\n".format(avg_reward), "Avg Length = {:.3f}\n".format(avg_length), "Best rate {:3.2f}, Success rate {:3.2f}%".format( best_rate, succ_rate) ]) print(msg) logging.info(msg)
def run_test(rank, params, loaded_model, lock, seen_succ, seen_length, unseen_succ, unseen_length): logging.basicConfig(filename='./log/' + params.log_file + '.log', level=logging.INFO) ptitle('Test Agent: {}'.format(rank)) gpu_id = params.gpu_ids_test[rank % len(params.gpu_ids_test)] api = objrender.RenderAPI(w=params.width, h=params.height, device=gpu_id) cfg = load_config('config.json') torch.manual_seed(params.seed + rank) if gpu_id >= 0: with torch.cuda.device(gpu_id): torch.cuda.manual_seed(params.seed + rank) model = A3C_LSTM_GA() with torch.cuda.device(gpu_id): model = model.cuda() load_model = torch.load( loaded_model, map_location=lambda storage, loc: storage.cuda(gpu_id)) model.load_state_dict(load_model) model.eval() Agent = run_agent(model, gpu_id) n_test = 0 start_time = time.time() while True: house_id = rank + (n_test * params.n_process) if house_id >= 70: break else: if house_id < 20: seen = True house = get_house_id(house_id) else: seen = False house = get_eval_house_id(house_id - (n_test * params.n_process)) env = Environment(api, house, cfg) task = RoomNavTask(env, hardness=params.hardness, segment_input=params.semantic_mode, max_steps=params.max_steps, discrete_action=True) #reward_type='indicator' eval = [] for i in range(params.n_test): next_observation = task.reset() target = task.info['target_room'] target = get_instruction_idx(target) with torch.cuda.device(gpu_id): target = Variable(torch.LongTensor(target)).cuda() Agent.cx = Variable(torch.zeros(1, 256).cuda()) Agent.hx = Variable(torch.zeros(1, 256).cuda()) Agent.target = target step, total_rew, good = 0, 0, 0 done = False while not done: observation = next_observation act = Agent.action_test(observation, target) next_observation, rew, done, info = task.step(actions[act[0]]) total_rew += rew if rew == 10: # success good = 1 step += 1 if done: break eval.append((step, total_rew, good)) if len(eval) > 0: succ = [e for e in eval if e[2] > 0] succ_rate = (len(succ) / len(eval)) * 100 avg_reward = sum([e[1] for e in eval]) / len(eval) avg_length = sum([e[0] for e in eval]) / len(eval) if seen: msg_seen = "Seen" msg_house = house_id else: msg_seen = "Unseen" msg_house = house_id - 20 msg = " ".join([ "++++++++++ Task Stats +++++++++++\n", "Time {}\n".format( time.strftime("%dd %Hh %Mm %Ss", time.gmtime(time.time() - start_time))), "Episode Played: {:d}\n".format(len(eval)), "{:s} House id: {:d}\n".format(msg_seen, msg_house), "Avg Reward = {:5.3f}\n".format(avg_reward), "Avg Length = {:.3f}\n".format(avg_length), "Success rate {:3.2f}%".format(succ_rate) ]) print(msg) logging.info(msg) with lock: if seen: seen_succ.value += len(succ) seen_length.value += sum([e[0] for e in eval]) else: unseen_succ.value += len(succ) unseen_length.value += sum([e[0] for e in eval]) n_test += 1