def do_test_(house_id, goal_prediction_model, navigation_model, action_type_model, config, action_space, meta_data_util, constants, test_dataset, experiment_name, rank, server, logger, vocab, goal_type, use_pushover=False): logger.log("In Testing...") launch_k_unity_builds([config["port"]], "./house_" + str(house_id) + "_elmer.x86_64", arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json", cwd="./simulators/house/") logger.log("Launched Builds.") server.initialize_server() logger.log("Server Initialized.") # Test policy test_policy = gp.get_argmax_action if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) logger.log('Created Tensorboard Server.') else: tensorboard = None if use_pushover: pushover_logger = None else: pushover_logger = None # Create the Agent tmp_agent = HouseDecoupledPredictorNavigatorAgent(server=server, goal_prediction_model=goal_prediction_model, navigation_model=navigation_model, action_type_model=action_type_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent.") tune_dataset_size = len(test_dataset) if tune_dataset_size > 0: # Test on tuning data # tmp_agent.test_single_step(test_dataset, vocab, goal_type=goal_type, tensorboard=tensorboard, # logger=logger, pushover_logger=pushover_logger) # tmp_agent.test_multi_step(test_dataset, vocab, num_outer_loop_steps=10, num_inner_loop_steps=4, # goal_type=goal_type, tensorboard=tensorboard, logger=logger, # pushover_logger=pushover_logger) # tmp_agent.test_multi_step_action_types(test_dataset, vocab, goal_type=goal_type, tensorboard=tensorboard, # logger=logger, pushover_logger=pushover_logger) tmp_agent.test_goal_distance(house_id, test_dataset, vocab, goal_type=goal_type, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger)
def do_test(house_id, chaplot_baseline, config, action_space, meta_data_util, constants, test_dataset, experiment_name, rank, server, logger): # torch.manual_seed(args.seed + rank) # Launch the Unity Build launch_k_unity_builds([config["port"]], "./house_" + str(house_id) + "_elmer.x86_64", arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json", cwd="./simulators/house/") # Initialize Server server.initialize_server() server.clear_metadata() logger.log("Server Initialized") # Test policy test_policy = gp.get_argmax_action # Create the Agent agent = TmpHouseAgent(server=server, model=chaplot_baseline, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) # Create tensorboard server if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) logger.log('Created Tensorboard Server...') else: tensorboard = None agent.test(test_dataset, vocab=None, tensorboard=tensorboard, logger=logger)
def do_train_(house_id, shared_model, config, action_space, meta_data_util, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, vocab, use_pushover=False): logger.log("In Training...") launch_k_unity_builds([config["port"]], "./house_" + str(house_id) + "_elmer.x86_64", arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json", cwd="./simulators/house/") logger.log("Launched Builds.") server.initialize_server() logger.log("Server Initialized.") # Test policy test_policy = gp.get_argmax_action if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) logger.log('Created Tensorboard Server.') else: tensorboard = None if use_pushover: pushover_logger = None else: pushover_logger = None # Create a local model for rollouts local_model = model_type(config, constants) # local_model.train() # Create the Agent tmp_agent = TmpHouseAgent(server=server, model=local_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent.") action_counts = [0] * action_space.num_actions() max_epochs = 100000 # constants["max_epochs"] dataset_size = len(train_dataset) tune_dataset_size = len(tune_dataset) if tune_dataset_size > 0: # Test on tuning data tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger) # Create the learner to compute the loss learner = TmpAsynchronousContextualBandit(shared_model, local_model, action_space, meta_data_util, config, constants, tensorboard) # TODO change 2 --- unity launch moved up learner.logger = logger for epoch in range(1, max_epochs + 1): for data_point_ix, data_point in enumerate(train_dataset): # Sync with the shared model # local_model.load_state_dict(shared_model.state_dict()) local_model.load_from_state_dict(shared_model.get_state_dict()) if (data_point_ix + 1) % 100 == 0: logger.log("Done %d out of %d" %(data_point_ix, dataset_size)) logger.log("Training data action counts %r" % action_counts) num_actions = 0 max_num_actions = constants["horizon"] max_num_actions += constants["max_extra_horizon"] image, metadata = tmp_agent.server.reset_receive_feedback(data_point) instruction = data_point.get_instruction() # instruction_str = TmpAsynchronousContextualBandit.convert_indices_to_text(instruction, vocab) # print("Instruction str is ", instruction_str) # Pose and Orientation gone TODO change 3 state = AgentObservedState(instruction=instruction, config=config, constants=constants, start_image=image, previous_action=None, data_point=data_point) state.goal = learner.get_goal(metadata) model_state = None batch_replay_items = [] total_reward = 0 forced_stop = True while num_actions < max_num_actions: # logger.log("Training: Meta Data %r " % metadata) # Sample action using the policy log_probabilities, model_state, image_emb_seq, state_feature = \ local_model.get_probs(state, model_state) probabilities = list(torch.exp(log_probabilities.data))[0] # Sample action from the probability action = gp.sample_action_from_prob(probabilities) action_counts[action] += 1 if action == action_space.get_stop_action_index(): forced_stop = False break # Send the action and get feedback image, reward, metadata = tmp_agent.server.send_action_receive_feedback(action) # logger.log("Action is %r, Reward is %r Probability is %r " % (action, reward, probabilities)) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities) batch_replay_items.append(replay_item) # Update the agent state # Pose and orientation gone, TODO change 4 state = state.update(image, action, data_point=data_point) state.goal = learner.get_goal(metadata) num_actions += 1 total_reward += reward # Send final STOP action and get feedback image, reward, metadata = tmp_agent.server.halt_and_receive_feedback() total_reward += reward # Store it in the replay memory list if not forced_stop: # logger.log("Action is Stop, Reward is %r Probability is %r " % (reward, probabilities)) replay_item = ReplayMemoryItem(state, action_space.get_stop_action_index(), reward, log_prob=log_probabilities) batch_replay_items.append(replay_item) # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update if len(batch_replay_items) > 0: # 32 loss_val = learner.do_update(batch_replay_items) if tensorboard is not None: # cross_entropy = float(learner.cross_entropy.data[0]) # tensorboard.log(cross_entropy, loss_val, 0) tensorboard.log_scalar("loss", loss_val) entropy = float(learner.entropy.data[0])/float(num_actions + 1) tensorboard.log_scalar("entropy", entropy) ratio = float(learner.ratio.data[0]) tensorboard.log_scalar("Abs_objective_to_entropy_ratio", ratio) tensorboard.log_scalar("total_reward", total_reward) tensorboard.log_scalar("mean navigation error", metadata['mean-navigation-error']) if learner.action_prediction_loss is not None: action_prediction_loss = float(learner.action_prediction_loss.data[0]) learner.tensorboard.log_action_prediction_loss(action_prediction_loss) if learner.temporal_autoencoder_loss is not None: temporal_autoencoder_loss = float(learner.temporal_autoencoder_loss.data[0]) tensorboard.log_temporal_autoencoder_loss(temporal_autoencoder_loss) if learner.object_detection_loss is not None: object_detection_loss = float(learner.object_detection_loss.data[0]) tensorboard.log_object_detection_loss(object_detection_loss) if learner.symbolic_language_prediction_loss is not None: symbolic_language_prediction_loss = float(learner.symbolic_language_prediction_loss.data[0]) tensorboard.log_scalar("sym_language_prediction_loss", symbolic_language_prediction_loss) if learner.goal_prediction_loss is not None: goal_prediction_loss = float(learner.goal_prediction_loss.data[0]) tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss) # Save the model local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch)) logger.log("Training data action counts %r" % action_counts) if tune_dataset_size > 0: # Test on tuning data tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger)
def do_train_(shared_model, config, action_space, meta_data_util, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, vocab, use_pushover=False): print("In training...") launch_k_unity_builds([config["port"]], "./simulators/house_3_elmer.x86_64") server.initialize_server() print("launched builds") # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None if use_pushover: # pushover_logger = PushoverLogger(experiment_name) pushover_logger = None else: pushover_logger = None # Create a local model for rollouts local_model = model_type(config, constants) # local_model.train() # Create the Agent logger.log("STARTING AGENT") tmp_agent = TmpHouseAgent(server=server, model=local_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") action_counts = [0] * action_space.num_actions() max_epochs = constants["max_epochs"] dataset_size = len(train_dataset) tune_dataset_size = len(tune_dataset) # Create the learner to compute the loss learner = TmpSupervisedLearning(shared_model, local_model, action_space, meta_data_util, config, constants, tensorboard) # TODO change 2 --- unity launch moved up for epoch in range(1, max_epochs + 1): for data_point_ix, data_point in enumerate(train_dataset): # Sync with the shared model # local_model.load_state_dict(shared_model.state_dict()) local_model.load_from_state_dict(shared_model.get_state_dict()) if (data_point_ix + 1) % 100 == 0: logger.log("Done %d out of %d" % (data_point_ix, dataset_size)) logger.log("Training data action counts %r" % action_counts) image, metadata = tmp_agent.server.reset_receive_feedback( data_point) # instruction = TmpSupervisedLearning.convert_text_to_indices(metadata["instruction"], vocab) instruction = data_point.get_instruction() # Pose and Orientation gone TODO change 3 state = AgentObservedState(instruction=instruction, config=config, constants=constants, start_image=image, previous_action=None, data_point=data_point) model_state = None batch_replay_items = [] total_reward = 0 # trajectory = metadata["trajectory"] trajectory = data_point.get_trajectory()[0:300] for action in trajectory: # Sample action using the policy log_probabilities, model_state, image_emb_seq, state_feature = \ local_model.get_probs(state, model_state) # Sample action from the probability action_counts[action] += 1 # Send the action and get feedback image, reward, metadata = tmp_agent.server.send_action_receive_feedback( action) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities) batch_replay_items.append(replay_item) # Update the agent state # Pose and orientation gone, TODO change 4 state = state.update(image, action, data_point=data_point) total_reward += reward # Send final STOP action and get feedback # Sample action using the policy log_probabilities, model_state, image_emb_seq, state_feature = \ local_model.get_probs(state, model_state) image, reward, metadata = tmp_agent.server.halt_and_receive_feedback( ) total_reward += reward # if tensorboard is not None: # tensorboard.log_all_train_errors( # metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Store it in the replay memory list replay_item = ReplayMemoryItem( state, action_space.get_stop_action_index(), reward, log_prob=log_probabilities) batch_replay_items.append(replay_item) # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update if len(batch_replay_items) > 0: # 32 loss_val = learner.do_update(batch_replay_items) # self.action_prediction_loss_calculator.predict_action(batch_replay_items) # del batch_replay_items[:] # in place list clear if tensorboard is not None: # cross_entropy = float(learner.cross_entropy.data[0]) # tensorboard.log(cross_entropy, loss_val, 0) num_actions = len(trajectory) + 1 tensorboard.log_scalar( "loss_val", loss_val) # /float(num_actions)) entropy = float( learner.entropy.data[0]) # /float(num_actions) tensorboard.log_scalar("entropy", entropy) ratio = float(learner.ratio.data[0]) tensorboard.log_scalar( "Abs_objective_to_entropy_ratio", ratio) if learner.action_prediction_loss is not None: action_prediction_loss = float( learner.action_prediction_loss.data[0]) learner.tensorboard.log_action_prediction_loss( action_prediction_loss) if learner.temporal_autoencoder_loss is not None: temporal_autoencoder_loss = float( learner.temporal_autoencoder_loss.data[0]) tensorboard.log_temporal_autoencoder_loss( temporal_autoencoder_loss) if learner.object_detection_loss is not None: object_detection_loss = float( learner.object_detection_loss.data[0]) tensorboard.log_object_detection_loss( object_detection_loss) if learner.symbolic_language_prediction_loss is not None: symbolic_language_prediction_loss = float( learner.symbolic_language_prediction_loss. data[0]) tensorboard.log_scalar( "sym_language_prediction_loss", symbolic_language_prediction_loss) if learner.goal_prediction_loss is not None: goal_prediction_loss = float( learner.goal_prediction_loss.data[0]) tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss) if learner.mean_factor_entropy is not None: mean_factor_entropy = float( learner.mean_factor_entropy.data[0]) tensorboard.log_factor_entropy_loss( mean_factor_entropy) # Save the model local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch)) logger.log("Training data action counts %r" % action_counts) if tune_dataset_size > 0: # Test on tuning data print("Going for testing") tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger) print("Done testing")
def main(): experiment_name = "blocks_save_image-test" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log("----------------------------------------------------------------") master_logger.log(" STARING NEW EXPERIMENT ") master_logger.log("----------------------------------------------------------------") with open("data/blocks/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config,indent=2)) setup_validator = BlocksSetupValidator() setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config) meta_data_util = MetaDataUtil() # Create vocabulary vocab = dict() vocab_list = open("./Assets/vocab_both").readlines() for i, tk in enumerate(vocab_list): token = tk.strip().lower() vocab[token] = i vocab["$UNK$"] = len(vocab_list) config["vocab_size"] = len(vocab_list) + 1 # Number of processes num_processes = 6 try: # create tensorboard tensorboard = None # Tensorboard(experiment_name) # Create the model master_logger.log("CREATING MODEL") model_type = IncrementalModelEmnlp shared_model = model_type(config, constants) # make the shared model use share memory shared_model.share_memory() master_logger.log("MODEL CREATED") print("Created Model...") # Read the dataset all_train_data = DatasetParser.parse("testset.json", config) tune_split = [] # all_train_data[:num_tune] train_split = list(all_train_data[:]) master_logger.log("Created train dataset of size %d " % len(train_split)) master_logger.log("Created tuning dataset of size %d " % len(tune_split)) # Start the training thread(s) ports = find_k_ports(num_processes) tmp_config = {k: v for k, v in config.items()} tmp_config["port"] = ports[0] server = BlocksServer(tmp_config, action_space) launch_k_unity_builds([ports[0]], "./simulators/blocks/retro_linux_build.x86_64") server.initialize_server() # Create a local model for rollouts local_model = model_type(config, constants) # Create the Agent tmp_agent = TmpBlockAgent(server=server, model=local_model, test_policy=None, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) tmp_agent.save_numpy_image(all_train_data, vocab, "test") except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def do_train(chaplot_baseline, shared_model, config, action_space, meta_data_util, args, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, contextual_bandit, use_pushover=False): sys.stderr = sys.stdout server.initialize_server() # Local Config Variables lstm_size = 256 # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None # Create the Agent logger.log("STARTING AGENT") agent = Agent(server=server, model=chaplot_baseline, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") # Create a local model for rollouts local_model = model_type(args, config=config) if torch.cuda.is_available(): local_model.cuda() chaplot_baseline.shared_model = local_model local_model.train() # Our Environment Interface env = NavDroneServerInterface(agent, local_model, experiment, config, constants, None, train_dataset, tune_dataset, rank, logger, use_pushover) env.game_init() # logging.info("Contextual bandit is %r and horizon is %r", self.contextual_bandit, args.max_episode_length) logger.log("Created NavDroneServerInterface") # optimizer = optim.SGD(self.shared_model.parameters(), lr=self.args.lr) --- changed Chaplot's optimizer optimizer = optim.Adam(shared_model.parameters(), lr=0.00025) p_losses = [] v_losses = [] launch_k_unity_builds([config["port"]], "./simulators/NavDroneLinuxBuild.x86_64") (image, instr), _, _ = env.reset() curr_instr, prev_instr, next_instr = instr curr_instruction_idx = np.array(curr_instr) prev_instruction_idx = np.array(prev_instr) next_instruction_idx = np.array(next_instr) image = torch.from_numpy(image).float() curr_instruction_idx = torch.from_numpy(curr_instruction_idx).view( 1, -1) prev_instruction_idx = torch.from_numpy(prev_instruction_idx).view( 1, -1) next_instruction_idx = torch.from_numpy(next_instruction_idx).view( 1, -1) done = True episode_length = 0 num_iters = 0 while True: # Sync with the shared model local_model.load_state_dict(shared_model.state_dict()) if done: episode_length = 0 cx = Variable(torch.zeros(1, lstm_size).cuda()) hx = Variable(torch.zeros(1, lstm_size).cuda()) else: # assert False, "Assertion put by Max and Dipendra. Code shouldn't reach here." cx = Variable(cx.data.cuda()) hx = Variable(hx.data.cuda()) values = [] log_probs = [] rewards = [] entropies = [] cached_information = None for step in range(args.num_steps): episode_length += 1 tx = Variable( torch.from_numpy(np.array([episode_length])).long().cuda()) value, logit, (hx, cx), cached_information = local_model( (Variable(image.unsqueeze(0).cuda()), Variable(curr_instruction_idx.cuda()), Variable(prev_instruction_idx.cuda()), Variable(next_instruction_idx.cuda()), (tx, hx, cx)), cached_information) prob = F.softmax(logit, dim=1) log_prob = F.log_softmax(logit, dim=1) entropy = -(log_prob * prob).sum(1) entropies.append(entropy) action = prob.multinomial().data log_prob = log_prob.gather(1, Variable(action.cuda())) action = action.cpu().numpy()[0, 0] (image, _), reward, done, _ = env.step(action) # done = done or (episode_length >= self.args.max_episode_length) if not done and (episode_length >= args.max_episode_length): # If the agent has not taken _, _, done, _ = env.step( env.client.agent.action_space.get_stop_action_index()) done = True if done: (image, instr), _, _ = env.reset() curr_instr, prev_instr, next_instr = instr curr_instruction_idx = np.array(curr_instr) prev_instruction_idx = np.array(prev_instr) next_instruction_idx = np.array(next_instr) curr_instruction_idx = torch.from_numpy( curr_instruction_idx).view(1, -1) prev_instruction_idx = torch.from_numpy( prev_instruction_idx).view(1, -1) next_instruction_idx = torch.from_numpy( next_instruction_idx).view(1, -1) image = torch.from_numpy(image).float() values.append(value) log_probs.append(log_prob) rewards.append(reward) if done: break if rank == 0 and tensorboard is not None: # Log total reward and entropy tensorboard.log_scalar("Total_Reward", sum(rewards)) mean_entropy = sum(entropies).data[0] / float( max(episode_length, 1)) tensorboard.log_scalar("Chaplot_Baseline_Entropy", mean_entropy) R = torch.zeros(1, 1) if not done: tx = Variable( torch.from_numpy(np.array([episode_length])).long().cuda()) value, _, _, _ = local_model( (Variable(image.unsqueeze(0).cuda()), Variable(curr_instruction_idx.cuda()), Variable(prev_instruction_idx.cuda()), Variable(next_instruction_idx.cuda()), (tx, hx, cx))) R = value.data values.append(Variable(R.cuda())) policy_loss = 0 value_loss = 0 R = Variable(R.cuda()) gae = torch.zeros(1, 1).cuda() for i in reversed(range(len(rewards))): R = args.gamma * R + rewards[i] advantage = R - values[i] value_loss = value_loss + 0.5 * advantage.pow(2) if contextual_bandit: # Just focus on immediate reward gae = torch.from_numpy(np.array([[rewards[i]]])).float() else: # Generalized Advantage Estimataion delta_t = rewards[i] + args.gamma * \ values[i + 1].data - values[i].data gae = gae * args.gamma * args.tau + delta_t policy_loss = policy_loss - \ log_probs[i] * Variable(gae.cuda()) - 0.02 * entropies[i] optimizer.zero_grad() p_losses.append(policy_loss.data[0, 0]) v_losses.append(value_loss.data[0, 0]) if len(p_losses) > 1000: num_iters += 1 logger.log(" ".join([ # "Training thread: {}".format(rank), "Num iters: {}K".format(num_iters), "Avg policy loss: {}".format(np.mean(p_losses)), "Avg value loss: {}".format(np.mean(v_losses)) ])) p_losses = [] v_losses = [] (policy_loss + 0.5 * value_loss).backward() torch.nn.utils.clip_grad_norm(local_model.parameters(), 40) ChaplotBaseline.ensure_shared_grads(local_model, shared_model) optimizer.step()
def do_train(chaplot_baseline, shared_model, config, action_space, meta_data_util, args, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, contextual_bandit=False, use_pushover=False): try: sys.stderr = sys.stdout server.initialize_server() # Local Config Variables lstm_size = 256 # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None # Create the Agent logger.log("STARTING AGENT") agent = Agent(server=server, model=chaplot_baseline, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") # Create a local model for rollouts local_model = model_type(args, config=config) if torch.cuda.is_available(): local_model.cuda() chaplot_baseline.shared_model = local_model local_model.train() # Our Environment Interface env = NavDroneServerInterface(agent, local_model, experiment, config, constants, None, train_dataset, tune_dataset, rank, logger, use_pushover) logger.log("Created NavDroneServerInterface") # optimizer = optim.SGD(self.shared_model.parameters(), lr=self.args.lr) --- changed Chaplot's optimizer optimizer = optim.Adam(shared_model.parameters(), lr=0.00025) p_losses = [] v_losses = [] launch_k_unity_builds([config["port"]], "./simulators/NavDroneLinuxBuild.x86_64") (image, instr), _, _, metadata, data_point = env.reset() curr_instr, prev_instr, next_instr = instr curr_instruction_idx = np.array(curr_instr) prev_instruction_idx = np.array(prev_instr) next_instruction_idx = np.array(next_instr) image = torch.from_numpy(image).float() curr_instruction_idx = torch.from_numpy(curr_instruction_idx).view( 1, -1) prev_instruction_idx = torch.from_numpy(prev_instruction_idx).view( 1, -1) next_instruction_idx = torch.from_numpy(next_instruction_idx).view( 1, -1) done = True episode_length = 0 num_iters = 0 cx, hx = None, None global_id = 1 while True: # Sync with the shared model local_model.load_state_dict(shared_model.state_dict()) if done: episode_length = 0 cx = Variable(torch.zeros(1, lstm_size).cuda()) hx = Variable(torch.zeros(1, lstm_size).cuda()) else: cx = Variable(cx.data.cuda()) hx = Variable(hx.data.cuda()) values = [] log_probs = [] rewards = [] entropies = [] cached_information = None ############################# lstm_rep = [] image_rep = [] actions = [] goal_locations = [] ############################# for step in range(args.num_steps): episode_length += 1 tx = Variable( torch.from_numpy(np.array([episode_length ])).long().cuda()) value, logit, (hx, cx), cached_information = local_model( (Variable(image.unsqueeze(0).cuda()), Variable(curr_instruction_idx.cuda()), Variable(prev_instruction_idx.cuda()), Variable(next_instruction_idx.cuda()), (tx, hx, cx)), cached_information) prob = F.softmax(logit, dim=1) log_prob = F.log_softmax(logit, dim=1) entropy = -(log_prob * prob).sum(1) entropies.append(entropy) action = prob.multinomial().data #################################### lstm_rep.append(cached_information["lstm_rep"]) image_rep.append(cached_information["image_rep"]) actions.append(action) goal_location = ChaplotBaselineWithAuxiliary.get_goal_location( metadata, data_point) goal_locations.append(goal_location) # ChaplotBaselineWithAuxiliary.save_visualized_image(image, goal_location, global_id) global_id += 1 #################################### log_prob = log_prob.gather(1, Variable(action.cuda())) action = action.cpu().numpy()[0, 0] (image, _), reward, done, _, metadata = env.step(action) # done = done or (episode_length >= self.args.max_episode_length) if not done and (episode_length >= args.max_episode_length): # If the agent has not taken _, _, done, _, metadata = env.step( env.agent.action_space.get_stop_action_index()) done = True if done: (image, instr), _, _, metadata, data_point = env.reset() curr_instr, prev_instr, next_instr = instr curr_instruction_idx = np.array(curr_instr) prev_instruction_idx = np.array(prev_instr) next_instruction_idx = np.array(next_instr) curr_instruction_idx = torch.from_numpy( curr_instruction_idx).view(1, -1) prev_instruction_idx = torch.from_numpy( prev_instruction_idx).view(1, -1) next_instruction_idx = torch.from_numpy( next_instruction_idx).view(1, -1) image = torch.from_numpy(image).float() values.append(value) log_probs.append(log_prob) rewards.append(reward) if done: break if rank == 0 and tensorboard is not None: # Log total reward and entropy tensorboard.log_scalar("Total_Reward", sum(rewards)) mean_entropy = sum(entropies).data[0] / float( max(episode_length, 1)) tensorboard.log_scalar("Chaplot_Baseline_Entropy", mean_entropy) R = torch.zeros(1, 1) if not done: tx = Variable( torch.from_numpy(np.array([episode_length ])).long().cuda()) value, _, _, _ = local_model( (Variable(image.unsqueeze(0).cuda()), Variable(curr_instruction_idx.cuda()), Variable(prev_instruction_idx.cuda()), Variable(next_instruction_idx.cuda()), (tx, hx, cx))) R = value.data values.append(Variable(R.cuda())) policy_loss = 0 value_loss = 0 R = Variable(R.cuda()) gae = torch.zeros(1, 1).cuda() entropy_coeff = max(0.0, 0.11 - env.num_epochs * 0.01) for i in reversed(range(len(rewards))): R = args.gamma * R + rewards[i] advantage = R - values[i] value_loss = value_loss + 0.5 * advantage.pow(2) if contextual_bandit: # Just focus on immediate reward gae = torch.from_numpy(np.array([[rewards[i]] ])).float() else: # Generalized Advantage Estimataion delta_t = rewards[i] + args.gamma * \ values[i + 1].data - values[i].data gae = gae * args.gamma * args.tau + delta_t policy_loss = policy_loss - \ log_probs[i] * Variable(gae.cuda()) - entropy_coeff * entropies[i] temporal_autoencoding_loss = None # local_model.get_tae_loss(image_rep, actions) reward_prediction_loss = None # local_model.get_reward_prediction_loss(lstm_rep, actions, rewards) alignment_loss, alignment_norm = None, None # local_model.alignment_auxiliary(image_rep, cached_information["text_rep"]) goal_prediction_loss = local_model.calc_goal_prediction_loss( image_rep, cached_information["text_rep"], goal_locations) optimizer.zero_grad() p_losses.append(policy_loss.data[0, 0]) v_losses.append(value_loss.data[0, 0]) if len(p_losses) > 1000: num_iters += 1 logger.log(" ".join([ # "Training thread: {}".format(rank), "Num iters: {}K".format(num_iters), "Avg policy loss: {}".format(np.mean(p_losses)), "Avg value loss: {}".format(np.mean(v_losses)) ])) p_losses = [] v_losses = [] if rank == 0 and tensorboard is not None: if done: tensorboard.log_scalar("train_dist_error", metadata["stop_dist_error"]) task_completion = 0 if metadata["stop_dist_error"] < 5.0: task_completion = 1 tensorboard.log_scalar("train_task_completion", task_completion) # Log total reward and entropy tensorboard.log_scalar("Value_Loss", float(value_loss.data)) if temporal_autoencoding_loss is not None: tensorboard.log_scalar( "TAE_Loss", float(temporal_autoencoding_loss.data)) if reward_prediction_loss is not None: tensorboard.log_scalar( "RP_Loss", float(reward_prediction_loss.data)) if alignment_loss is not None: tensorboard.log_scalar( "Mean_Current_Segment_Alignment_Loss", float(alignment_loss.data)) tensorboard.log_scalar("Alignment_Norm", float(alignment_norm.data)) if goal_prediction_loss is not None: tensorboard.log_scalar( "Goal_Prediction_Loss", float(goal_prediction_loss.data) / float(len(rewards))) loss = policy_loss + 0.5 * value_loss if temporal_autoencoding_loss is not None: loss += 0.5 * temporal_autoencoding_loss if reward_prediction_loss is not None: loss += 0.5 * reward_prediction_loss if alignment_loss is not None: loss += 0.5 * alignment_loss if goal_prediction_loss is not None: loss += 0.5 * goal_prediction_loss loss.backward() torch.nn.utils.clip_grad_norm(local_model.parameters(), 40) ChaplotBaselineWithAuxiliary.ensure_shared_grads( local_model, shared_model) optimizer.step() except Exception: print("Exception") exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): experiment_name = "blocks_experiments" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/test_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") with open("data/blocks/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config, indent=2)) setup_validator = BlocksSetupValidator() setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config) meta_data_util = MetaDataUtil() # Create vocabulary vocab = dict() vocab_list = open("./Assets/vocab_both").readlines() for i, tk in enumerate(vocab_list): token = tk.strip().lower() vocab[token] = i vocab["$UNK$"] = len(vocab_list) config["vocab_size"] = len(vocab_list) + 1 # Test policy test_policy = gp.get_argmax_action # Create tensorboard tensorboard = Tensorboard("Agent Test") try: # Create the model master_logger.log("CREATING MODEL") model_type = IncrementalModelEmnlp shared_model = model_type(config, constants) shared_model.load_saved_model( "./results/model-folder-name/model-file-name") # Read the dataset test_data = DatasetParser.parse("devset.json", config) master_logger.log("Created test dataset of size %d " % len(test_data)) # Create server and launch a client simulator_file = "./simulators/blocks/retro_linux_build.x86_64" config["port"] = find_k_ports(1)[0] server = BlocksServer(config, action_space, vocab=vocab) # Launch unity launch_k_unity_builds([config["port"]], simulator_file) server.initialize_server() # Create the agent master_logger.log("CREATING AGENT") agent = Agent(server=server, model=shared_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) agent.test(test_data, tensorboard) except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def do_train_(shared_model, config, action_space, meta_data_util, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, use_pushover=False): server.initialize_server() # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None if use_pushover: pushover_logger = PushoverLogger(experiment_name) else: pushover_logger = None # Create a local model for rollouts local_model = model_type(config, constants) # local_model.train() # Create the Agent logger.log("STARTING AGENT") agent = Agent(server=server, model=local_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") action_counts = [0] * action_space.num_actions() max_epochs = constants["max_epochs"] dataset_size = len(train_dataset) tune_dataset_size = len(tune_dataset) # Create the learner to compute the loss learner = AsynchronousAdvantageActorGAECritic(shared_model, local_model, action_space, meta_data_util, config, constants, tensorboard) # Launch unity launch_k_unity_builds([config["port"]], "./simulators/NavDroneLinuxBuild.x86_64") for epoch in range(1, max_epochs + 1): learner.epoch = epoch task_completion_accuracy = 0 mean_stop_dist_error = 0 stop_dist_errors = [] for data_point_ix, data_point in enumerate(train_dataset): # Sync with the shared model # local_model.load_state_dict(shared_model.state_dict()) local_model.load_from_state_dict(shared_model.get_state_dict()) if (data_point_ix + 1) % 100 == 0: logger.log("Done %d out of %d" % (data_point_ix, dataset_size)) logger.log("Training data action counts %r" % action_counts) num_actions = 0 max_num_actions = constants["horizon"] + constants[ "max_extra_horizon"] image, metadata = agent.server.reset_receive_feedback( data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.instruction, config=config, constants=constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point) state.goal = GoalPrediction.get_goal_location( metadata, data_point, learner.image_height, learner.image_width) model_state = None batch_replay_items = [] total_reward = 0 forced_stop = True while num_actions < max_num_actions: # Sample action using the policy log_probabilities, model_state, image_emb_seq, volatile = \ local_model.get_probs(state, model_state) probabilities = list(torch.exp(log_probabilities.data))[0] # Sample action from the probability action = gp.sample_action_from_prob(probabilities) action_counts[action] += 1 # Generate goal if config["do_goal_prediction"]: goal = learner.goal_prediction_calculator.get_goal_location( metadata, data_point, learner.image_height, learner.image_width) else: goal = None if action == action_space.get_stop_action_index(): forced_stop = False break # Send the action and get feedback image, reward, metadata = agent.server.send_action_receive_feedback( action) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities, volatile=volatile, goal=goal) batch_replay_items.append(replay_item) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update( image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) state.goal = GoalPrediction.get_goal_location( metadata, data_point, learner.image_height, learner.image_width) num_actions += 1 total_reward += reward # Send final STOP action and get feedback image, reward, metadata = agent.server.halt_and_receive_feedback( ) total_reward += reward if metadata["stop_dist_error"] < 5.0: task_completion_accuracy += 1 mean_stop_dist_error += metadata["stop_dist_error"] stop_dist_errors.append(metadata["stop_dist_error"]) if tensorboard is not None: tensorboard.log_all_train_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Store it in the replay memory list if not forced_stop: replay_item = ReplayMemoryItem( state, action_space.get_stop_action_index(), reward, log_prob=log_probabilities, volatile=volatile, goal=goal) batch_replay_items.append(replay_item) # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update if len(batch_replay_items) > 0: # 32: loss_val = learner.do_update(batch_replay_items) # self.action_prediction_loss_calculator.predict_action(batch_replay_items) # del batch_replay_items[:] # in place list clear if tensorboard is not None: cross_entropy = float(learner.cross_entropy.data[0]) tensorboard.log(cross_entropy, loss_val, 0) entropy = float( learner.entropy.data[0]) / float(num_actions + 1) v_value_loss_per_step = float( learner.value_loss.data[0]) / float(num_actions + 1) tensorboard.log_scalar("entropy", entropy) tensorboard.log_scalar("total_reward", total_reward) tensorboard.log_scalar("v_value_loss_per_step", v_value_loss_per_step) ratio = float(learner.ratio.data[0]) tensorboard.log_scalar( "Abs_objective_to_entropy_ratio", ratio) if learner.action_prediction_loss is not None: action_prediction_loss = float( learner.action_prediction_loss.data[0]) learner.tensorboard.log_action_prediction_loss( action_prediction_loss) if learner.temporal_autoencoder_loss is not None: temporal_autoencoder_loss = float( learner.temporal_autoencoder_loss.data[0]) tensorboard.log_temporal_autoencoder_loss( temporal_autoencoder_loss) if learner.object_detection_loss is not None: object_detection_loss = float( learner.object_detection_loss.data[0]) tensorboard.log_object_detection_loss( object_detection_loss) if learner.symbolic_language_prediction_loss is not None: symbolic_language_prediction_loss = float( learner.symbolic_language_prediction_loss. data[0]) tensorboard.log_scalar( "sym_language_prediction_loss", symbolic_language_prediction_loss) if learner.goal_prediction_loss is not None: goal_prediction_loss = float( learner.goal_prediction_loss.data[0]) tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss) # Save the model local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch)) logger.log("Training data action counts %r" % action_counts) mean_stop_dist_error = mean_stop_dist_error / float( len(train_dataset)) task_completion_accuracy = (task_completion_accuracy * 100.0) / float(len(train_dataset)) logger.log("Training: Mean stop distance error %r" % mean_stop_dist_error) logger.log("Training: Task completion accuracy %r " % task_completion_accuracy) bins = range(0, 80, 3) # range of distance histogram, _ = np.histogram(stop_dist_errors, bins) logger.log("Histogram of train errors %r " % histogram) if tune_dataset_size > 0: # Test on tuning data agent.test(tune_dataset, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger)
def do_train_(shared_model, config, action_space, meta_data_util, args, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, use_pushover=False): server.initialize_server() # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None if use_pushover: pushover_logger = PushoverLogger(experiment_name) else: pushover_logger = None # Create a local model for rollouts local_model = model_type(args, config=config) if torch.cuda.is_available(): local_model.cuda() local_model.train() # Create the Agent logger.log("STARTING AGENT") agent = Agent(server=server, model=local_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") action_counts = [0] * action_space.num_actions() max_epochs = constants["max_epochs"] dataset_size = len(train_dataset) tune_dataset_size = len(tune_dataset) # Create the learner to compute the loss learner = AsynchronousContextualBandit(shared_model, local_model, action_space, meta_data_util, config, constants, tensorboard) # Launch unity launch_k_unity_builds([ config["port"] ], "/home/dipendra/Downloads/NavDroneLinuxBuild/NavDroneLinuxBuild.x86_64" ) for epoch in range(1, max_epochs + 1): if tune_dataset_size > 0: # Test on tuning data agent.test(tune_dataset, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger) for data_point_ix, data_point in enumerate(train_dataset): # Sync with the shared model # local_model.load_state_dict(shared_model.state_dict()) local_model.load_from_state_dict(shared_model.get_state_dict()) if (data_point_ix + 1) % 100 == 0: logging.info("Done %d out of %d", data_point_ix, dataset_size) logging.info("Training data action counts %r", action_counts) num_actions = 0 # max_num_actions = len(data_point.get_trajectory()) # max_num_actions += self.constants["max_extra_horizon"] max_num_actions = constants["horizon"] image, metadata = agent.server.reset_receive_feedback( data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.instruction, config=config, constants=constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point) model_state = None batch_replay_items = [] total_reward = 0 forced_stop = True while num_actions < max_num_actions: # Sample action using the policy log_probabilities, model_state, image_emb_seq, state_feature = \ local_model.get_probs(state, model_state) probabilities = list(torch.exp(log_probabilities.data))[0] # Sample action from the probability action = gp.sample_action_from_prob(probabilities) action_counts[action] += 1 if action == action_space.get_stop_action_index(): forced_stop = False break # Send the action and get feedback image, reward, metadata = agent.server.send_action_receive_feedback( action) # Store it in the replay memory list rewards = learner.get_all_rewards(metadata) replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities, all_rewards=rewards) batch_replay_items.append(replay_item) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update( image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) num_actions += 1 total_reward += reward # Send final STOP action and get feedback image, reward, metadata = agent.server.halt_and_receive_feedback( ) rewards = learner.get_all_rewards(metadata) total_reward += reward if tensorboard is not None: tensorboard.log_all_train_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Store it in the replay memory list if not forced_stop: replay_item = ReplayMemoryItem( state, action_space.get_stop_action_index(), reward, log_prob=log_probabilities, all_rewards=rewards) batch_replay_items.append(replay_item) # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update if len(batch_replay_items) > 0: loss_val = learner.do_update(batch_replay_items) # self.action_prediction_loss_calculator.predict_action(batch_replay_items) del batch_replay_items[:] # in place list clear if tensorboard is not None: cross_entropy = float(learner.cross_entropy.data[0]) tensorboard.log(cross_entropy, loss_val, 0) entropy = float(learner.entropy.data[0]) tensorboard.log_scalar("entropy", entropy) ratio = float(learner.ratio.data[0]) tensorboard.log_scalar( "Abs_objective_to_entropy_ratio", ratio) if learner.action_prediction_loss is not None: action_prediction_loss = float( learner.action_prediction_loss.data[0]) learner.tensorboard.log_action_prediction_loss( action_prediction_loss) if learner.temporal_autoencoder_loss is not None: temporal_autoencoder_loss = float( learner.temporal_autoencoder_loss.data[0]) tensorboard.log_temporal_autoencoder_loss( temporal_autoencoder_loss) if learner.object_detection_loss is not None: object_detection_loss = float( learner.object_detection_loss.data[0]) tensorboard.log_object_detection_loss( object_detection_loss) if learner.symbolic_language_prediction_loss is not None: symbolic_language_prediction_loss = float( learner.symbolic_language_prediction_loss. data[0]) tensorboard.log_scalar( "sym_language_prediction_loss", symbolic_language_prediction_loss) if learner.goal_prediction_loss is not None: goal_prediction_loss = float( learner.goal_prediction_loss.data[0]) tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss) if learner.mean_factor_entropy is not None: mean_factor_entropy = float( learner.mean_factor_entropy.data[0]) tensorboard.log_factor_entropy_loss( mean_factor_entropy) # Save the model local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch)) logging.info("Training data action counts %r", action_counts)
def do_train_(shared_model, config, action_space, meta_data_util, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, use_pushover=False): server.initialize_server() # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None if use_pushover: pushover_logger = PushoverLogger(experiment_name) else: pushover_logger = None # Create a local model for rollouts local_model = model_type(config, constants) # Create the Agent logger.log("STARTING AGENT") agent = Agent(server=server, model=local_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") action_counts = [0] * action_space.num_actions() max_epochs = constants["max_epochs"] dataset_size = len(train_dataset) tune_dataset_size = len(tune_dataset) # Create the learner to compute the loss learner = AsynchronousSupervisedLearning(shared_model, local_model, action_space, meta_data_util, config, constants, tensorboard) # Launch unity launch_k_unity_builds([config["port"]], "./simulators/NavDroneLinuxBuild.x86_64") for epoch in range(1, max_epochs + 1): learner.epoch = epoch for data_point_ix, data_point in enumerate(train_dataset): # Sync with the shared model # local_model.load_state_dict(shared_model.state_dict()) local_model.load_from_state_dict(shared_model.get_state_dict()) if (data_point_ix + 1) % 100 == 0: logger.log("Done %d out of %d" % (data_point_ix, dataset_size)) logger.log("Training data action counts %r" % action_counts) num_actions = 0 trajectory = data_point.get_trajectory() image, metadata = agent.server.reset_receive_feedback( data_point) pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = AgentObservedState( instruction=data_point.instruction, config=config, constants=constants, start_image=image, previous_action=None, pose=pose, position_orientation=position_orientation, data_point=data_point) model_state = None batch_replay_items = [] total_reward = 0 for action in trajectory: # Sample action using the policy log_probabilities, model_state, image_emb_seq, volatile = \ local_model.get_probs(state, model_state) action_counts[action] += 1 # Generate goal if config["do_goal_prediction"]: goal = learner.goal_prediction_calculator.get_goal_location( metadata, data_point, 8, 8) # learner.goal_prediction_calculator.save_attention_prob(image, volatile) # time.sleep(5) else: goal = None # Send the action and get feedback image, reward, metadata = agent.server.send_action_receive_feedback( action) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities, volatile=volatile, goal=goal) batch_replay_items.append(replay_item) # Update the agent state pose = int(metadata["y_angle"] / 15.0) position_orientation = (metadata["x_pos"], metadata["z_pos"], metadata["y_angle"]) state = state.update( image, action, pose=pose, position_orientation=position_orientation, data_point=data_point) num_actions += 1 total_reward += reward # Sample action using the policy log_probabilities, model_state, image_emb_seq, volatile = \ local_model.get_probs(state, model_state) # Generate goal if config["do_goal_prediction"]: goal = learner.goal_prediction_calculator.get_goal_location( metadata, data_point, 8, 8) # learner.goal_prediction_calculator.save_attention_prob(image, volatile) # time.sleep(5) else: goal = None # Send final STOP action and get feedback image, reward, metadata = agent.server.halt_and_receive_feedback( ) total_reward += reward if tensorboard is not None: tensorboard.log_all_train_errors( metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"]) # Store it in the replay memory list replay_item = ReplayMemoryItem( state, action_space.get_stop_action_index(), reward, log_prob=log_probabilities, volatile=volatile, goal=goal) batch_replay_items.append(replay_item) ###########################################3 AsynchronousSupervisedLearning.save_goal( batch_replay_items, data_point_ix, trajectory) ###########################################3 # Update the scores based on meta_data # self.meta_data_util.log_results(metadata) # Perform update if len(batch_replay_items) > 0: # 32: loss_val = learner.do_update(batch_replay_items) # self.action_prediction_loss_calculator.predict_action(batch_replay_items) # del batch_replay_items[:] # in place list clear if tensorboard is not None: cross_entropy = float(learner.cross_entropy.data[0]) tensorboard.log(cross_entropy, loss_val, 0) entropy = float( learner.entropy.data[0]) / float(num_actions + 1) tensorboard.log_scalar("entropy", entropy) tensorboard.log_scalar("total_reward", total_reward) ratio = float(learner.ratio.data[0]) tensorboard.log_scalar( "Abs_objective_to_entropy_ratio", ratio) if learner.action_prediction_loss is not None: action_prediction_loss = float( learner.action_prediction_loss.data[0]) learner.tensorboard.log_action_prediction_loss( action_prediction_loss) if learner.temporal_autoencoder_loss is not None: temporal_autoencoder_loss = float( learner.temporal_autoencoder_loss.data[0]) tensorboard.log_temporal_autoencoder_loss( temporal_autoencoder_loss) if learner.object_detection_loss is not None: object_detection_loss = float( learner.object_detection_loss.data[0]) tensorboard.log_object_detection_loss( object_detection_loss) if learner.symbolic_language_prediction_loss is not None: symbolic_language_prediction_loss = float( learner.symbolic_language_prediction_loss. data[0]) tensorboard.log_scalar( "sym_language_prediction_loss", symbolic_language_prediction_loss) if learner.goal_prediction_loss is not None: goal_prediction_loss = float( learner.goal_prediction_loss.data[0]) tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss) if learner.goal_prob is not None: goal_prob = float(learner.goal_prob.data[0]) tensorboard.log_scalar("goal_prob", goal_prob) if learner.mean_factor_entropy is not None: mean_factor_entropy = float( learner.mean_factor_entropy.data[0]) tensorboard.log_factor_entropy_loss( mean_factor_entropy) # Save the model local_model.save_model(experiment + "/supervised_learning_" + str(rank) + "_epoch_" + str(epoch)) logger.log("Training data action counts %r" % action_counts) if tune_dataset_size > 0: # Test on tuning data agent.test_goal_prediction(tune_dataset, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger)
def main(): experiment_name = "test_block_baselines" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/test_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log("----------------------------------------------------------------") master_logger.log(" STARING NEW EXPERIMENT ") master_logger.log("----------------------------------------------------------------") with open("data/blocks/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) print(json.dumps(config,indent=2)) setup_validator = BlocksSetupValidator() setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") action_space = ActionSpace(config) meta_data_util = MetaDataUtil() # Create vocabulary vocab = dict() vocab_list = open("./Assets/vocab_both").readlines() for i, tk in enumerate(vocab_list): token = tk.strip().lower() vocab[token] = i vocab["$UNK$"] = len(vocab_list) config["vocab_size"] = len(vocab_list) + 1 try: # Read the dataset if args.split == "train": test_data = DatasetParser.parse("trainset.json", config) elif args.split == "dev": test_data = DatasetParser.parse("devset.json", config) elif args.split == "test": test_data = DatasetParser.parse("testset.json", config) else: raise AssertionError("Unhandled dataset split %r. Only support train, dev and test." % args.split) master_logger.log("Created test dataset of size %d " % len(test_data)) # Create server and launch a client simulator_file = "./simulators/blocks/retro_linux_build.x86_64" config["port"] = find_k_ports(1)[0] server = BlocksServer(config, action_space, vocab=vocab) # Launch unity launch_k_unity_builds([config["port"]], simulator_file) server.initialize_server() # Create the agent master_logger.log("CREATING AGENT") if args.baseline == "stop": agent_type = Agent.STOP elif args.baseline == "random": agent_type = Agent.RANDOM_WALK elif args.baseline == "frequent": agent_type = Agent.MOST_FREQUENT # TODO compute most frequent action from the dataset else: raise AssertionError("Unhandled agent type %r. Only support stop, random and frequent." % args.baseline) agent = Agent(agent_type=agent_type, server=server, action_space=action_space, meta_data_util=meta_data_util, constants=constants) agent.test(test_data) except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def main(): data_filename = "./simulators/house/AssetsHouse" experiment_name = "emnlp_camera_ready_test_human_performance" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Number of processes house_id = 3 # Define log settings log_path = experiment + '/test_baseline_%d.log' % house_id multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") with open("data/house/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) constants['horizon'] = 40 # TODO HACK!! print(json.dumps(config, indent=2)) # TODO: HouseSetupValidator() # setup_validator = BlocksSetupValidator() # setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") config["use_manipulation"] = True # debug manipulation action_space = ActionSpace(config["action_names"], config["stop_action"], config["use_manipulation"], config["num_manipulation_row"], config["num_manipulation_col"]) meta_data_util = MetaDataUtil() # TODO: Create vocabulary vocab = dict() vocab_list = open(data_filename + "/house_all_vocab.txt").readlines() for i, tk in enumerate(vocab_list): token = tk.strip().lower() vocab[i] = token vocab[len(vocab_list)] = "$UNK$" config["vocab_size"] = len(vocab_list) + 1 try: # Create the model master_logger.log("CREATING MODEL") model_type = TmpHouseIncrementalModelChaplot shared_model = model_type(config, constants) # model.load_saved_model("./results/paragraph_chaplot_attention/chaplot_model_epoch_3") # make the shared model use share memory shared_model.share_memory() master_logger.log("MODEL CREATED") print("Created Model...") # Read the dataset test_split = DatasetParser.parse( data_filename + "/tokenized_house" + str(house_id) + "_discrete_dev.json", config) test_split = test_split[2:20] # Launch the server ports = find_k_ports(1) port = ports[0] tmp_config = {k: v for k, v in config.items()} tmp_config["port"] = port print("Client " + str(0) + " getting a validation set of size ", len(test_split)) server = HouseServer(tmp_config, action_space, port) launch_k_unity_builds([tmp_config["port"]], "./house_" + str(house_id) + "_elmer.x86_64", arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json", cwd="./simulators/house/") server.initialize_server() # Create a local model for rollouts local_model = model_type(tmp_config, constants) # local_model.train() # Create the Agent print("STARTING AGENT") tmp_agent = TmpHouseAgent(server=server, model=local_model, test_policy=None, action_space=action_space, meta_data_util=meta_data_util, config=tmp_config, constants=constants) print("Created Agent...") tmp_agent.test_human_performance(test_split, vocab, master_logger) except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
def do_supervised_train(chaplot_baseline, shared_model, config, action_space, meta_data_util, args, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, contextual_bandit=False, use_pushover=False): try: sys.stderr = sys.stdout server.initialize_server() # Local Config Variables lstm_size = 256 # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None # Create the Agent logger.log("STARTING AGENT") agent = Agent(server=server, model=chaplot_baseline, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") # Create a local model for rollouts local_model = model_type(args, config=config) if torch.cuda.is_available(): local_model.cuda() chaplot_baseline.shared_model = local_model local_model.train() # Our Environment Interface env = NavDroneServerInterface(agent, local_model, experiment, config, constants, None, train_dataset, tune_dataset, rank, logger, use_pushover) logger.log("Created NavDroneServerInterface") # optimizer = optim.SGD(self.shared_model.parameters(), lr=self.args.lr) --- changed Chaplot's optimizer optimizer = optim.Adam(shared_model.parameters(), lr=0.00025) p_losses = [] v_losses = [] launch_k_unity_builds([ config["port"] ], "/home/dipendra/Downloads/NavDroneLinuxBuild/NavDroneLinuxBuild.x86_64" ) done = True num_iters = 0 global_id = 1 while True: # Sync with the shared model local_model.load_state_dict(shared_model.state_dict()) # Get a new datapoint (image, instr), _, _, metadata, data_point = env.reset() curr_instr, prev_instr, next_instr = instr curr_instruction_idx = np.array(curr_instr) prev_instruction_idx = np.array(prev_instr) next_instruction_idx = np.array(next_instr) image = torch.from_numpy(image).float() curr_instruction_idx = torch.from_numpy( curr_instruction_idx).view(1, -1) prev_instruction_idx = torch.from_numpy( prev_instruction_idx).view(1, -1) next_instruction_idx = torch.from_numpy( next_instruction_idx).view(1, -1) episode_length = 0 cx = Variable(torch.zeros(1, lstm_size).cuda()) hx = Variable(torch.zeros(1, lstm_size).cuda()) goal_x, goal_z = data_point.get_destination_list()[-1] trajectory_str = get_oracle_trajectory(metadata, goal_x, goal_z, data_point) trajectory = [ action_space.get_action_index(act_str) for act_str in trajectory_str ] # trajectory = data_point.get_trajectory() num_steps = len(trajectory) + 1 # 1 for stopping values = [] log_probs = [] rewards = [] entropies = [] cached_information = None ############################# lstm_rep = [] image_rep = [] actions = [] goal_locations = [] ############################# for step in range(num_steps): episode_length += 1 tx = Variable( torch.from_numpy(np.array([episode_length ])).long().cuda()) value, logit, (hx, cx), cached_information = local_model( (Variable(image.unsqueeze(0).cuda()), Variable(curr_instruction_idx.cuda()), Variable(prev_instruction_idx.cuda()), Variable(next_instruction_idx.cuda()), (tx, hx, cx)), cached_information) prob = F.softmax(logit, dim=1) log_prob = F.log_softmax(logit, dim=1) entropy = -(log_prob * prob).sum(1) entropies.append(entropy) if step == len(trajectory): action = action_space.get_stop_action_index() else: action = trajectory[step] action_var = torch.from_numpy(np.array([[action]])) #################################### lstm_rep.append(cached_information["lstm_rep"]) image_rep.append(cached_information["image_rep"]) actions.append(action_var) goal_location = ChaplotBaselineWithAuxiliary.get_goal_location( metadata, data_point) goal_locations.append(goal_location) # ChaplotBaselineWithAuxiliary.save_visualized_image(image, goal_location, global_id) global_id += 1 #################################### log_prob = log_prob.gather(1, Variable(action_var.cuda())) (image, _), reward, done, _, metadata = env.step(action) image = torch.from_numpy(image).float() values.append(value) log_probs.append(log_prob) rewards.append(reward) assert done, "Should be done as all trajectories are fully executed and stop with 'stop' action." if rank == 0 and tensorboard is not None: # Log total reward and entropy tensorboard.log_scalar("Total_Reward", sum(rewards)) mean_entropy = sum(entropies).data[0] / float( max(episode_length, 1)) tensorboard.log_scalar("Chaplot_Baseline_Entropy", mean_entropy) R = torch.zeros(1, 1) values.append(Variable(R.cuda())) policy_loss = 0 value_loss = 0 R = Variable(R.cuda()) entropy_coeff = max(0.0, 0.11 - env.num_epochs * 0.01) for i in reversed(range(len(rewards))): R = args.gamma * R + rewards[i] advantage = R - values[i] value_loss = value_loss + 0.5 * advantage.pow(2) policy_loss = policy_loss - \ log_probs[i] - entropy_coeff * entropies[i] temporal_autoencoding_loss = None # local_model.get_tae_loss(image_rep, actions) reward_prediction_loss = None # local_model.get_reward_prediction_loss(lstm_rep, actions, rewards) alignment_loss, alignment_norm = None, None # local_model.alignment_auxiliary(image_rep, cached_information["text_rep"]) goal_prediction_loss = local_model.calc_goal_prediction_loss( image_rep, cached_information["text_rep"], goal_locations) optimizer.zero_grad() p_losses.append(policy_loss.data[0, 0]) v_losses.append(value_loss.data[0, 0]) if len(p_losses) > 1000: num_iters += 1 logger.log(" ".join([ # "Training thread: {}".format(rank), "Num iters: {}K".format(num_iters), "Avg policy loss: {}".format(np.mean(p_losses)), "Avg value loss: {}".format(np.mean(v_losses)) ])) p_losses = [] v_losses = [] if rank == 0 and tensorboard is not None: # Log total reward and entropy tensorboard.log_scalar("Value_Loss", float(value_loss.data)) if temporal_autoencoding_loss is not None: tensorboard.log_scalar( "TAE_Loss", float(temporal_autoencoding_loss.data)) if reward_prediction_loss is not None: tensorboard.log_scalar( "RP_Loss", float(reward_prediction_loss.data)) if alignment_loss is not None: tensorboard.log_scalar( "Mean_Current_Segment_Alignment_Loss", float(alignment_loss.data)) tensorboard.log_scalar("Alignment_Norm", float(alignment_norm.data)) if goal_prediction_loss is not None: tensorboard.log_scalar( "Goal_Prediction_Loss", float(goal_prediction_loss.data) / float(num_steps)) loss = policy_loss + 0.5 * value_loss if temporal_autoencoding_loss is not None: loss += 0.5 * temporal_autoencoding_loss if reward_prediction_loss is not None: loss += 0.5 * reward_prediction_loss if alignment_loss is not None: loss += 0.5 * alignment_loss if goal_prediction_loss is not None: loss += 20.0 * goal_prediction_loss loss = goal_prediction_loss loss.backward() torch.nn.utils.clip_grad_norm(local_model.parameters(), 40) ChaplotBaselineWithAuxiliary.ensure_shared_grads( local_model, shared_model) optimizer.step() except Exception: print("Exception") exc_info = sys.exc_info() traceback.print_exception(*exc_info)
logging.log(logging.DEBUG, "MODEL CREATED") # Create the agent logging.log(logging.DEBUG, "STARTING AGENT") agent = Agent(server=server, model=model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) # create tensorboard tensorboard = Tensorboard("dummy") # Launch Unity Build launch_k_unity_builds([config["port"]], "./simulators/NavDroneLinuxBuild.x86_64") test_data = DatasetParser.parse("data/nav_drone/dev_annotations_6000.json", config) agent.test(test_data, tensorboard) server.kill() except Exception: server.kill() exc_info = sys.exc_info() traceback.print_exception(*exc_info) # raise e
def do_train_(simulator_file, shared_model, config, action_space, meta_data_util, constants, train_dataset, tune_dataset, experiment, experiment_name, rank, server, logger, model_type, use_pushover=False): # Launch unity launch_k_unity_builds([config["port"]], simulator_file) server.initialize_server() # Test policy test_policy = gp.get_argmax_action # torch.manual_seed(args.seed + rank) if rank == 0: # client 0 creates a tensorboard server tensorboard = Tensorboard(experiment_name) else: tensorboard = None if use_pushover: pushover_logger = PushoverLogger(experiment_name) else: pushover_logger = None # Create a local model for rollouts local_model = model_type(config, constants) # local_model.train() # Create the Agent logger.log("STARTING AGENT") agent = Agent(server=server, model=local_model, test_policy=test_policy, action_space=action_space, meta_data_util=meta_data_util, config=config, constants=constants) logger.log("Created Agent...") action_counts = [0] * action_space.num_actions() max_epochs = constants["max_epochs"] dataset_size = len(train_dataset) tune_dataset_size = len(tune_dataset) # Create the learner to compute the loss learner = AsynchronousContextualBandit(shared_model, local_model, action_space, meta_data_util, config, constants, tensorboard) for epoch in range(1, max_epochs + 1): for data_point_ix, data_point in enumerate(train_dataset): # Sync with the shared model local_model.load_from_state_dict(shared_model.get_state_dict()) if (data_point_ix + 1) % 100 == 0: logger.log("Done %d out of %d" % (data_point_ix, dataset_size)) logger.log("Training data action counts %r" % action_counts) num_actions = 0 max_num_actions = constants["horizon"] + constants[ "max_extra_horizon"] image, metadata = agent.server.reset_receive_feedback( data_point) state = AgentObservedState(instruction=data_point.instruction, config=config, constants=constants, start_image=image, previous_action=None, data_point=data_point) meta_data_util.start_state_update_metadata(state, metadata) model_state = None batch_replay_items = [] total_reward = 0 forced_stop = True while num_actions < max_num_actions: # Sample action using the policy log_probabilities, model_state, image_emb_seq, volatile = \ local_model.get_probs(state, model_state) probabilities = list(torch.exp(log_probabilities.data))[0] # Sample action from the probability action = gp.sample_action_from_prob(probabilities) action_counts[action] += 1 if action == action_space.get_stop_action_index(): forced_stop = False break # Send the action and get feedback image, reward, metadata = agent.server.send_action_receive_feedback( action) # Store it in the replay memory list replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities, volatile=volatile) batch_replay_items.append(replay_item) # Update the agent state state = state.update(image, action, data_point=data_point) meta_data_util.state_update_metadata(state, metadata) num_actions += 1 total_reward += reward # Send final STOP action and get feedback image, reward, metadata = agent.server.halt_and_receive_feedback( ) total_reward += reward if tensorboard is not None: meta_data_util.state_update_metadata(tensorboard, metadata) # Store it in the replay memory list if not forced_stop: replay_item = ReplayMemoryItem( state, action_space.get_stop_action_index(), reward, log_prob=log_probabilities, volatile=volatile) batch_replay_items.append(replay_item) # Perform update if len(batch_replay_items) > 0: loss_val = learner.do_update(batch_replay_items) if tensorboard is not None: entropy = float( learner.entropy.data[0]) / float(num_actions + 1) tensorboard.log_scalar("loss", loss_val) tensorboard.log_scalar("entropy", entropy) tensorboard.log_scalar("total_reward", total_reward) # Save the model local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch)) logger.log("Training data action counts %r" % action_counts) if tune_dataset_size > 0: # Test on tuning data agent.test(tune_dataset, tensorboard=tensorboard, logger=logger, pushover_logger=pushover_logger)
def test_multiprocess(house_id, test_dataset, config, action_space, port, agent_type, meta_data_util, constants, vocab, logger, pushover_logger=None): # start the python client logger.log("In Testing...") launch_k_unity_builds([config["port"]], "./house_" + str(house_id) + "_elmer.x86_64", arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json", cwd="./simulators/house/") logger.log("Launched Builds") # start the server server = HouseServer(config, action_space, port) server.initialize_server() server.clear_metadata() logger.log("Server Initialized...") max_num_actions = constants["horizon"] task_completion_accuracy = 0 metadata = {"feedback": ""} action_counts = [0] * action_space.num_actions() for data_point in test_dataset: image, metadata = server.reset_receive_feedback(data_point) action_seq = data_point.get_trajectory() act_idx = 0 num_actions = 0 instruction_string = " ".join( [vocab[token_id] for token_id in data_point.instruction]) Agent.log("Instruction is %r " % instruction_string, logger) while True: if agent_type == Agent.STOP: action = action_space.get_stop_action_index() elif agent_type == Agent.RANDOM_WALK: actions = list(range(0, action_space.num_actions())) # actions.remove(action_space.get_stop_action_index()) action = random.choice(actions) elif agent_type == Agent.ORACLE: if act_idx == len(action_seq): action = action_space.get_stop_action_index() else: action = action_seq[act_idx] act_idx += 1 elif agent_type == Agent.MOST_FREQUENT: action = 0 # Assumes that most frequent action is the first action else: raise AssertionError("Unknown type " + agent_type) if action == action_space.get_stop_action_index( ) or num_actions >= max_num_actions: # Send the action and get feedback image, reward, metadata = server.halt_and_receive_feedback( ) action_counts[action_space.get_stop_action_index()] += 1 if metadata["navigation-error"] <= 1.0: task_completion_accuracy += 1 # Update the scores based on meta_data meta_data_util.log_results(metadata) Agent.log(metadata, logger) break else: # Send the action and get feedback image, reward, metadata = server.send_action_receive_feedback( action) action_counts[action] += 1 num_actions += 1 task_completion_accuracy = (task_completion_accuracy * 100.0) / float( max(len(test_dataset), 1)) Agent.log("House %r Overall test results:" % house_id, logger) Agent.log( "House %r Test Data Size %r:" % (house_id, len(test_dataset)), logger) Agent.log( "House %r Overall mean navigation error %r:" % (house_id, metadata["mean-navigation-error"]), logger) Agent.log( "House %r Testing: Final Metadata: %r" % (house_id, metadata), logger) Agent.log( "House %r Testing: Action Distribution: %r" % (house_id, action_counts), logger) Agent.log( "House %r Testing: Manipulation Accuracy: %r " % (house_id, metadata["mean-manipulation-accuracy"]), logger) Agent.log( "House %r Testing: Navigation Accuracy: %r " % (house_id, task_completion_accuracy), logger) # self.meta_data_util.log_results(metadata, logger) Agent.log( "House %r Testing data action counts %r" % (house_id, action_counts), logger) if pushover_logger is not None: pushover_feedback = str(metadata["feedback"]) pushover_logger.log(pushover_feedback)
logging.info("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): logging.info(">>> " + line.strip()) logging.info("END SCRIPT CONTENTS") act_space = ActionSpace(config["action_names"], config["stop_action"]) meta_data_util = MetaDataUtil() # Create the server logging.log(logging.DEBUG, "STARTING SERVER") server = HouseServer(config, act_space, ports[0]) logging.log(logging.DEBUG, "STARTED SERVER") # Launch the build launch_k_unity_builds([ports[0]], "./simulators/house_3_elmer.x86_64") # Launched the build server.connect() # Create the agent logging.log(logging.DEBUG, "STARTING AGENT") agent = Agent(Agent.ORACLE, server, act_space, meta_data_util) # Read the house dataset dev_dataset = DatasetParser.parse("data/house/dataset/house_3_dev.json", config) logging.info("Created test dataset of size %d ", len(dev_dataset)) # Test on this dataset agent.test(dev_dataset)
def main(): data_filename = "./simulators/house/AssetsHouse" experiment_name = "tmp_house_1_debug_manual_control" experiment = "./results/" + experiment_name print("EXPERIMENT NAME: ", experiment_name) # Create the experiment folder if not os.path.exists(experiment): os.makedirs(experiment) # Define log settings log_path = experiment + '/train_baseline.log' multiprocess_logging_manager = MultiprocessingLoggerManager( file_path=log_path, logging_level=logging.INFO) master_logger = multiprocess_logging_manager.get_logger("Master") master_logger.log( "----------------------------------------------------------------") master_logger.log( " STARING NEW EXPERIMENT ") master_logger.log( "----------------------------------------------------------------") with open("data/house/config.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) constants['horizon'] = 40 # TODO HACK!! print(json.dumps(config, indent=2)) # TODO: HouseSetupValidator() # setup_validator = BlocksSetupValidator() # setup_validator.validate(config, constants) # log core experiment details master_logger.log("CONFIG DETAILS") for k, v in sorted(config.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("CONSTANTS DETAILS") for k, v in sorted(constants.items()): master_logger.log(" %s --- %r" % (k, v)) master_logger.log("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): master_logger.log(">>> " + line.strip()) master_logger.log("END SCRIPT CONTENTS") config["use_manipulation"] = True # debug manipulation action_space = ActionSpace(config["action_names"], config["stop_action"], config["use_manipulation"], config["num_manipulation_row"], config["num_manipulation_col"]) meta_data_util = MetaDataUtil() # TODO: Create vocabulary vocab = dict() vocab_list = open(data_filename + "/house_all_vocab.txt").readlines() for i, tk in enumerate(vocab_list): token = tk.strip().lower() # vocab[token] = i vocab[i] = token # vocab["$UNK$"] = len(vocab_list) vocab[len(vocab_list)] = "$UNK$" config["vocab_size"] = len(vocab_list) + 1 # Number of processes house_ids = [1] # [1,2,3] num_processes = len(house_ids) try: # Create the model master_logger.log("CREATING MODEL") model_type = TmpHouseIncrementalModelChaplot shared_model = model_type(config, constants) # model.load_saved_model("./results/paragraph_chaplot_attention/chaplot_model_epoch_3") # make the shared model use share memory shared_model.share_memory() master_logger.log("MODEL CREATED") print("Created Model...") # Read the dataset tune_split, train_split = [], [] for hid in house_ids: all_train_data = DatasetParser.parse( data_filename + "/tokenized_house" + str(hid) + "_discrete.json", config) all_dev_data = DatasetParser.parse( data_filename + "/tokenized_house" + str(hid) + "_discrete_dev.json", config) # num_tune = int(len(all_train_data) * 0.1) # train_split.append(list(all_train_data[num_tune:])) # tune_split.append(list(all_train_data[:num_tune])) # Extract type of the dataset # lines = open("./simulators/house/datapoint_type_house" + str(hid) + "_v5_110.txt").readlines() # datapoint_id_type = {} # for line in lines: # datapoint_id, datapoint_type = line.split() # datapoint_id_type[int(datapoint_id)] = datapoint_type.strip() # # # Filter manipulation type # all_train_data = list( # filter(lambda datapoint: datapoint_id_type[datapoint.get_id()] == "navigation", all_train_data)) all_train_data = all_train_data[0:50] train_split.append(all_train_data) tune_split.append(all_train_data) # train_split.append(all_train_data) # tune_split.append(all_dev_data) # Launch the server ports = find_k_ports(1) port = ports[0] tmp_config = {k: v for k, v in config.items()} tmp_config["port"] = port tmp_tune_split = tune_split[0] print("Client " + str(0) + " getting a validation set of size ", len(tmp_tune_split)) server = HouseServer(tmp_config, action_space, port) house_id = house_ids[0] launch_k_unity_builds([tmp_config["port"]], "./house_" + str(house_id) + "_elmer.x86_64", arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json", cwd="./simulators/house/") server.initialize_server() # Create a local model for rollouts local_model = model_type(tmp_config, constants) # local_model.train() # Create the Agent print("STARTING AGENT") tmp_agent = TmpHouseAgent(server=server, model=local_model, test_policy=None, action_space=action_space, meta_data_util=meta_data_util, config=tmp_config, constants=constants) print("Created Agent...") index = 0 while True: print("Giving another data %r ", len(train_split[0])) # index = random.randint(0, len(train_split[0]) - 1) index = (index + 1) % len(train_split[0]) print("Dataset id is " + str(train_split[0][index].get_id())) tmp_agent.debug_manual_control(train_split[0][index], vocab) # tmp_agent.debug_tracking(train_split[0][index], vocab) except Exception: exc_info = sys.exc_info() traceback.print_exception(*exc_info)
# Test policy test_policy = gp.get_argmax_action with open("data/nav_drone/config_localmoves_6000.json") as f: config = json.load(f) with open("data/shared/contextual_bandit_constants.json") as f: constants = json.load(f) if len(sys.argv) > 1: config["port"] = int(sys.argv[1]) setup_validator = NavDroneSetupValidator() setup_validator.validate(config, constants) ports = find_k_ports(1) config["port"] = ports[0] launch_k_unity_builds(ports, "simulators/NavDroneLinuxBuild.x86_64") # log core experiment details logging.info("CONFIG DETAILS") for k, v in sorted(config.items()): logging.info(" %s --- %r" % (k, v)) logging.info("CONSTANTS DETAILS") for k, v in sorted(constants.items()): logging.info(" %s --- %r" % (k, v)) logging.info("START SCRIPT CONTENTS") with open(__file__) as f: for line in f.readlines(): logging.info(">>> " + line.strip()) logging.info("END SCRIPT CONTENTS") action_space = ActionSpace(config["action_names"], config["stop_action"])