예제 #1
0
    def do_test_(house_id, goal_prediction_model, navigation_model, action_type_model, config,
                 action_space, meta_data_util, constants, test_dataset,
                 experiment_name, rank, server, logger, vocab, goal_type, use_pushover=False):

        logger.log("In Testing...")
        launch_k_unity_builds([config["port"]], "./house_" + str(house_id) + "_elmer.x86_64",
                              arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json",
                              cwd="./simulators/house/")
        logger.log("Launched Builds.")
        server.initialize_server()
        logger.log("Server Initialized.")

        # Test policy
        test_policy = gp.get_argmax_action

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
            logger.log('Created Tensorboard Server.')
        else:
            tensorboard = None

        if use_pushover:
            pushover_logger = None
        else:
            pushover_logger = None

        # Create the Agent
        tmp_agent = HouseDecoupledPredictorNavigatorAgent(server=server,
                                                          goal_prediction_model=goal_prediction_model,
                                                          navigation_model=navigation_model,
                                                          action_type_model=action_type_model,
                                                          test_policy=test_policy,
                                                          action_space=action_space,
                                                          meta_data_util=meta_data_util,
                                                          config=config,
                                                          constants=constants)
        logger.log("Created Agent.")
        tune_dataset_size = len(test_dataset)

        if tune_dataset_size > 0:
            # Test on tuning data
            # tmp_agent.test_single_step(test_dataset, vocab, goal_type=goal_type, tensorboard=tensorboard,
            #                            logger=logger, pushover_logger=pushover_logger)
            # tmp_agent.test_multi_step(test_dataset, vocab, num_outer_loop_steps=10, num_inner_loop_steps=4,
            #                           goal_type=goal_type, tensorboard=tensorboard, logger=logger,
            #                           pushover_logger=pushover_logger)
            # tmp_agent.test_multi_step_action_types(test_dataset, vocab, goal_type=goal_type, tensorboard=tensorboard,
            #                                        logger=logger, pushover_logger=pushover_logger)
            tmp_agent.test_goal_distance(house_id, test_dataset, vocab, goal_type=goal_type, tensorboard=tensorboard,
                                                   logger=logger, pushover_logger=pushover_logger)
예제 #2
0
    def do_test(house_id, chaplot_baseline, config, action_space,
                meta_data_util, constants, test_dataset, experiment_name, rank,
                server, logger):

        # torch.manual_seed(args.seed + rank)

        # Launch the Unity Build
        launch_k_unity_builds([config["port"]],
                              "./house_" + str(house_id) + "_elmer.x86_64",
                              arg_str="--config ./AssetsHouse/config" +
                              str(house_id) + ".json",
                              cwd="./simulators/house/")

        # Initialize Server
        server.initialize_server()
        server.clear_metadata()
        logger.log("Server Initialized")

        # Test policy
        test_policy = gp.get_argmax_action

        # Create the Agent
        agent = TmpHouseAgent(server=server,
                              model=chaplot_baseline,
                              test_policy=test_policy,
                              action_space=action_space,
                              meta_data_util=meta_data_util,
                              config=config,
                              constants=constants)

        # Create tensorboard server
        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
            logger.log('Created Tensorboard Server...')
        else:
            tensorboard = None

        agent.test(test_dataset,
                   vocab=None,
                   tensorboard=tensorboard,
                   logger=logger)
    def do_train_(house_id, shared_model, config, action_space, meta_data_util, constants,
                  train_dataset, tune_dataset, experiment, experiment_name, rank, server,
                  logger, model_type, vocab, use_pushover=False):

        logger.log("In Training...")
        launch_k_unity_builds([config["port"]], "./house_" + str(house_id) + "_elmer.x86_64",
                              arg_str="--config ./AssetsHouse/config" + str(house_id) + ".json",
                              cwd="./simulators/house/")
        logger.log("Launched Builds.")
        server.initialize_server()
        logger.log("Server Initialized.")

        # Test policy
        test_policy = gp.get_argmax_action

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
            logger.log('Created Tensorboard Server.')
        else:
            tensorboard = None

        if use_pushover:
            pushover_logger = None
        else:
            pushover_logger = None

        # Create a local model for rollouts
        local_model = model_type(config, constants)
        # local_model.train()

        # Create the Agent
        tmp_agent = TmpHouseAgent(server=server,
                                  model=local_model,
                                  test_policy=test_policy,
                                  action_space=action_space,
                                  meta_data_util=meta_data_util,
                                  config=config,
                                  constants=constants)
        logger.log("Created Agent.")

        action_counts = [0] * action_space.num_actions()
        max_epochs = 100000 # constants["max_epochs"]
        dataset_size = len(train_dataset)
        tune_dataset_size = len(tune_dataset)

        if tune_dataset_size > 0:
            # Test on tuning data
            tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard,
                           logger=logger, pushover_logger=pushover_logger)

        # Create the learner to compute the loss
        learner = TmpAsynchronousContextualBandit(shared_model, local_model, action_space, meta_data_util,
                                                  config, constants, tensorboard)
        # TODO change 2 --- unity launch moved up
        learner.logger = logger

        for epoch in range(1, max_epochs + 1):

            for data_point_ix, data_point in enumerate(train_dataset):

                # Sync with the shared model
                # local_model.load_state_dict(shared_model.state_dict())
                local_model.load_from_state_dict(shared_model.get_state_dict())

                if (data_point_ix + 1) % 100 == 0:
                    logger.log("Done %d out of %d" %(data_point_ix, dataset_size))
                    logger.log("Training data action counts %r" % action_counts)

                num_actions = 0
                max_num_actions = constants["horizon"]
                max_num_actions += constants["max_extra_horizon"]

                image, metadata = tmp_agent.server.reset_receive_feedback(data_point)
                instruction = data_point.get_instruction()
                # instruction_str = TmpAsynchronousContextualBandit.convert_indices_to_text(instruction, vocab)
                # print("Instruction str is ", instruction_str)

                # Pose and Orientation gone TODO change 3
                state = AgentObservedState(instruction=instruction,
                                           config=config,
                                           constants=constants,
                                           start_image=image,
                                           previous_action=None,
                                           data_point=data_point)
                state.goal = learner.get_goal(metadata)

                model_state = None
                batch_replay_items = []
                total_reward = 0
                forced_stop = True

                while num_actions < max_num_actions:

                    # logger.log("Training: Meta Data %r " % metadata)

                    # Sample action using the policy
                    log_probabilities, model_state, image_emb_seq, state_feature = \
                        local_model.get_probs(state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))[0]

                    # Sample action from the probability
                    action = gp.sample_action_from_prob(probabilities)
                    action_counts[action] += 1

                    if action == action_space.get_stop_action_index():
                        forced_stop = False
                        break

                    # Send the action and get feedback
                    image, reward, metadata = tmp_agent.server.send_action_receive_feedback(action)
                    # logger.log("Action is %r, Reward is %r Probability is %r " % (action, reward, probabilities))

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state, action, reward, log_prob=log_probabilities)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    # Pose and orientation gone, TODO change 4
                    state = state.update(image, action, data_point=data_point)
                    state.goal = learner.get_goal(metadata)

                    num_actions += 1
                    total_reward += reward

                # Send final STOP action and get feedback
                image, reward, metadata = tmp_agent.server.halt_and_receive_feedback()
                total_reward += reward

                # Store it in the replay memory list
                if not forced_stop:
                    # logger.log("Action is Stop, Reward is %r Probability is %r " % (reward, probabilities))
                    replay_item = ReplayMemoryItem(state, action_space.get_stop_action_index(),
                                                   reward, log_prob=log_probabilities)
                    batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                if len(batch_replay_items) > 0:  # 32
                    loss_val = learner.do_update(batch_replay_items)

                    if tensorboard is not None:
                        # cross_entropy = float(learner.cross_entropy.data[0])
                        # tensorboard.log(cross_entropy, loss_val, 0)
                        tensorboard.log_scalar("loss", loss_val)
                        entropy = float(learner.entropy.data[0])/float(num_actions + 1)
                        tensorboard.log_scalar("entropy", entropy)
                        ratio = float(learner.ratio.data[0])
                        tensorboard.log_scalar("Abs_objective_to_entropy_ratio", ratio)
                        tensorboard.log_scalar("total_reward", total_reward)
                        tensorboard.log_scalar("mean navigation error", metadata['mean-navigation-error'])

                        if learner.action_prediction_loss is not None:
                            action_prediction_loss = float(learner.action_prediction_loss.data[0])
                            learner.tensorboard.log_action_prediction_loss(action_prediction_loss)
                        if learner.temporal_autoencoder_loss is not None:
                            temporal_autoencoder_loss = float(learner.temporal_autoencoder_loss.data[0])
                            tensorboard.log_temporal_autoencoder_loss(temporal_autoencoder_loss)
                        if learner.object_detection_loss is not None:
                            object_detection_loss = float(learner.object_detection_loss.data[0])
                            tensorboard.log_object_detection_loss(object_detection_loss)
                        if learner.symbolic_language_prediction_loss is not None:
                            symbolic_language_prediction_loss = float(learner.symbolic_language_prediction_loss.data[0])
                            tensorboard.log_scalar("sym_language_prediction_loss", symbolic_language_prediction_loss)
                        if learner.goal_prediction_loss is not None:
                            goal_prediction_loss = float(learner.goal_prediction_loss.data[0])
                            tensorboard.log_scalar("goal_prediction_loss", goal_prediction_loss)

            # Save the model
            local_model.save_model(experiment + "/contextual_bandit_" + str(rank) + "_epoch_" + str(epoch))
            logger.log("Training data action counts %r" % action_counts)

            if tune_dataset_size > 0:
                # Test on tuning data
                tmp_agent.test(tune_dataset, vocab, tensorboard=tensorboard,
                               logger=logger, pushover_logger=pushover_logger)
예제 #4
0
    def do_train_(shared_model,
                  config,
                  action_space,
                  meta_data_util,
                  constants,
                  train_dataset,
                  tune_dataset,
                  experiment,
                  experiment_name,
                  rank,
                  server,
                  logger,
                  model_type,
                  vocab,
                  use_pushover=False):

        print("In training...")

        launch_k_unity_builds([config["port"]],
                              "./simulators/house_3_elmer.x86_64")
        server.initialize_server()
        print("launched builds")

        # Test policy
        test_policy = gp.get_argmax_action

        # torch.manual_seed(args.seed + rank)

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
        else:
            tensorboard = None

        if use_pushover:
            # pushover_logger = PushoverLogger(experiment_name)
            pushover_logger = None
        else:
            pushover_logger = None

        # Create a local model for rollouts
        local_model = model_type(config, constants)
        # local_model.train()

        # Create the Agent
        logger.log("STARTING AGENT")
        tmp_agent = TmpHouseAgent(server=server,
                                  model=local_model,
                                  test_policy=test_policy,
                                  action_space=action_space,
                                  meta_data_util=meta_data_util,
                                  config=config,
                                  constants=constants)
        logger.log("Created Agent...")

        action_counts = [0] * action_space.num_actions()
        max_epochs = constants["max_epochs"]
        dataset_size = len(train_dataset)
        tune_dataset_size = len(tune_dataset)

        # Create the learner to compute the loss
        learner = TmpSupervisedLearning(shared_model, local_model,
                                        action_space, meta_data_util, config,
                                        constants, tensorboard)
        # TODO change 2 --- unity launch moved up

        for epoch in range(1, max_epochs + 1):

            for data_point_ix, data_point in enumerate(train_dataset):

                # Sync with the shared model
                # local_model.load_state_dict(shared_model.state_dict())
                local_model.load_from_state_dict(shared_model.get_state_dict())

                if (data_point_ix + 1) % 100 == 0:
                    logger.log("Done %d out of %d" %
                               (data_point_ix, dataset_size))
                    logger.log("Training data action counts %r" %
                               action_counts)

                image, metadata = tmp_agent.server.reset_receive_feedback(
                    data_point)
                # instruction = TmpSupervisedLearning.convert_text_to_indices(metadata["instruction"], vocab)
                instruction = data_point.get_instruction()

                # Pose and Orientation gone TODO change 3
                state = AgentObservedState(instruction=instruction,
                                           config=config,
                                           constants=constants,
                                           start_image=image,
                                           previous_action=None,
                                           data_point=data_point)

                model_state = None
                batch_replay_items = []
                total_reward = 0

                # trajectory = metadata["trajectory"]
                trajectory = data_point.get_trajectory()[0:300]

                for action in trajectory:

                    # Sample action using the policy
                    log_probabilities, model_state, image_emb_seq, state_feature = \
                        local_model.get_probs(state, model_state)

                    # Sample action from the probability
                    action_counts[action] += 1

                    # Send the action and get feedback
                    image, reward, metadata = tmp_agent.server.send_action_receive_feedback(
                        action)

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state,
                                                   action,
                                                   reward,
                                                   log_prob=log_probabilities)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    # Pose and orientation gone, TODO change 4
                    state = state.update(image, action, data_point=data_point)

                    total_reward += reward

                # Send final STOP action and get feedback
                # Sample action using the policy
                log_probabilities, model_state, image_emb_seq, state_feature = \
                    local_model.get_probs(state, model_state)
                image, reward, metadata = tmp_agent.server.halt_and_receive_feedback(
                )
                total_reward += reward

                # if tensorboard is not None:
                #     tensorboard.log_all_train_errors(
                #         metadata["edit_dist_error"], metadata["closest_dist_error"], metadata["stop_dist_error"])

                # Store it in the replay memory list
                replay_item = ReplayMemoryItem(
                    state,
                    action_space.get_stop_action_index(),
                    reward,
                    log_prob=log_probabilities)
                batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                if len(batch_replay_items) > 0:  # 32
                    loss_val = learner.do_update(batch_replay_items)
                    # self.action_prediction_loss_calculator.predict_action(batch_replay_items)
                    # del batch_replay_items[:]  # in place list clear

                    if tensorboard is not None:
                        # cross_entropy = float(learner.cross_entropy.data[0])
                        # tensorboard.log(cross_entropy, loss_val, 0)
                        num_actions = len(trajectory) + 1
                        tensorboard.log_scalar(
                            "loss_val", loss_val)  # /float(num_actions))
                        entropy = float(
                            learner.entropy.data[0])  # /float(num_actions)
                        tensorboard.log_scalar("entropy", entropy)
                        ratio = float(learner.ratio.data[0])
                        tensorboard.log_scalar(
                            "Abs_objective_to_entropy_ratio", ratio)

                        if learner.action_prediction_loss is not None:
                            action_prediction_loss = float(
                                learner.action_prediction_loss.data[0])
                            learner.tensorboard.log_action_prediction_loss(
                                action_prediction_loss)
                        if learner.temporal_autoencoder_loss is not None:
                            temporal_autoencoder_loss = float(
                                learner.temporal_autoencoder_loss.data[0])
                            tensorboard.log_temporal_autoencoder_loss(
                                temporal_autoencoder_loss)
                        if learner.object_detection_loss is not None:
                            object_detection_loss = float(
                                learner.object_detection_loss.data[0])
                            tensorboard.log_object_detection_loss(
                                object_detection_loss)
                        if learner.symbolic_language_prediction_loss is not None:
                            symbolic_language_prediction_loss = float(
                                learner.symbolic_language_prediction_loss.
                                data[0])
                            tensorboard.log_scalar(
                                "sym_language_prediction_loss",
                                symbolic_language_prediction_loss)
                        if learner.goal_prediction_loss is not None:
                            goal_prediction_loss = float(
                                learner.goal_prediction_loss.data[0])
                            tensorboard.log_scalar("goal_prediction_loss",
                                                   goal_prediction_loss)
                        if learner.mean_factor_entropy is not None:
                            mean_factor_entropy = float(
                                learner.mean_factor_entropy.data[0])
                            tensorboard.log_factor_entropy_loss(
                                mean_factor_entropy)

            # Save the model
            local_model.save_model(experiment + "/contextual_bandit_" +
                                   str(rank) + "_epoch_" + str(epoch))
            logger.log("Training data action counts %r" % action_counts)

            if tune_dataset_size > 0:
                # Test on tuning data
                print("Going for testing")
                tmp_agent.test(tune_dataset,
                               vocab,
                               tensorboard=tensorboard,
                               logger=logger,
                               pushover_logger=pushover_logger)
                print("Done testing")
예제 #5
0
def main():

    experiment_name = "blocks_save_image-test"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log("----------------------------------------------------------------")
    master_logger.log("                    STARING NEW EXPERIMENT                      ")
    master_logger.log("----------------------------------------------------------------")

    with open("data/blocks/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config,indent=2))
    setup_validator = BlocksSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config)
    meta_data_util = MetaDataUtil()

    # Create vocabulary
    vocab = dict()
    vocab_list = open("./Assets/vocab_both").readlines()
    for i, tk in enumerate(vocab_list):
        token = tk.strip().lower()
        vocab[token] = i
    vocab["$UNK$"] = len(vocab_list)
    config["vocab_size"] = len(vocab_list) + 1

    # Number of processes
    num_processes = 6

    try:
        # create tensorboard
        tensorboard = None  # Tensorboard(experiment_name)

        # Create the model
        master_logger.log("CREATING MODEL")
        model_type = IncrementalModelEmnlp
        shared_model = model_type(config, constants)

        # make the shared model use share memory
        shared_model.share_memory()

        master_logger.log("MODEL CREATED")
        print("Created Model...")

        # Read the dataset
        all_train_data = DatasetParser.parse("testset.json", config)
        tune_split = []  # all_train_data[:num_tune]
        train_split = list(all_train_data[:])

        master_logger.log("Created train dataset of size %d " % len(train_split))
        master_logger.log("Created tuning dataset of size %d " % len(tune_split))

        # Start the training thread(s)
        ports = find_k_ports(num_processes)
        tmp_config = {k: v for k, v in config.items()}
        tmp_config["port"] = ports[0]

        server = BlocksServer(tmp_config, action_space)
        launch_k_unity_builds([ports[0]], "./simulators/blocks/retro_linux_build.x86_64")
        server.initialize_server()

        # Create a local model for rollouts
        local_model = model_type(config, constants)

        # Create the Agent
        tmp_agent = TmpBlockAgent(server=server,
                                  model=local_model,
                                  test_policy=None,
                                  action_space=action_space,
                                  meta_data_util=meta_data_util,
                                  config=config,
                                  constants=constants)
        tmp_agent.save_numpy_image(all_train_data, vocab, "test")

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #6
0
    def do_train(chaplot_baseline,
                 shared_model,
                 config,
                 action_space,
                 meta_data_util,
                 args,
                 constants,
                 train_dataset,
                 tune_dataset,
                 experiment,
                 experiment_name,
                 rank,
                 server,
                 logger,
                 model_type,
                 contextual_bandit,
                 use_pushover=False):

        sys.stderr = sys.stdout
        server.initialize_server()
        # Local Config Variables
        lstm_size = 256

        # Test policy
        test_policy = gp.get_argmax_action

        # torch.manual_seed(args.seed + rank)

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
        else:
            tensorboard = None

        # Create the Agent
        logger.log("STARTING AGENT")
        agent = Agent(server=server,
                      model=chaplot_baseline,
                      test_policy=test_policy,
                      action_space=action_space,
                      meta_data_util=meta_data_util,
                      config=config,
                      constants=constants)
        logger.log("Created Agent...")

        # Create a local model for rollouts
        local_model = model_type(args, config=config)
        if torch.cuda.is_available():
            local_model.cuda()
        chaplot_baseline.shared_model = local_model
        local_model.train()

        #  Our Environment Interface
        env = NavDroneServerInterface(agent, local_model, experiment, config,
                                      constants, None, train_dataset,
                                      tune_dataset, rank, logger, use_pushover)
        env.game_init()
        # logging.info("Contextual bandit is %r and horizon is %r", self.contextual_bandit, args.max_episode_length)
        logger.log("Created NavDroneServerInterface")

        # optimizer = optim.SGD(self.shared_model.parameters(), lr=self.args.lr) --- changed Chaplot's optimizer
        optimizer = optim.Adam(shared_model.parameters(), lr=0.00025)
        p_losses = []
        v_losses = []

        launch_k_unity_builds([config["port"]],
                              "./simulators/NavDroneLinuxBuild.x86_64")
        (image, instr), _, _ = env.reset()
        curr_instr, prev_instr, next_instr = instr
        curr_instruction_idx = np.array(curr_instr)
        prev_instruction_idx = np.array(prev_instr)
        next_instruction_idx = np.array(next_instr)

        image = torch.from_numpy(image).float()
        curr_instruction_idx = torch.from_numpy(curr_instruction_idx).view(
            1, -1)
        prev_instruction_idx = torch.from_numpy(prev_instruction_idx).view(
            1, -1)
        next_instruction_idx = torch.from_numpy(next_instruction_idx).view(
            1, -1)

        done = True

        episode_length = 0
        num_iters = 0

        while True:
            # Sync with the shared model
            local_model.load_state_dict(shared_model.state_dict())
            if done:
                episode_length = 0
                cx = Variable(torch.zeros(1, lstm_size).cuda())
                hx = Variable(torch.zeros(1, lstm_size).cuda())

            else:
                # assert False, "Assertion put by Max and Dipendra. Code shouldn't reach here."
                cx = Variable(cx.data.cuda())
                hx = Variable(hx.data.cuda())

            values = []
            log_probs = []
            rewards = []
            entropies = []
            cached_information = None

            for step in range(args.num_steps):
                episode_length += 1
                tx = Variable(
                    torch.from_numpy(np.array([episode_length])).long().cuda())

                value, logit, (hx, cx), cached_information = local_model(
                    (Variable(image.unsqueeze(0).cuda()),
                     Variable(curr_instruction_idx.cuda()),
                     Variable(prev_instruction_idx.cuda()),
                     Variable(next_instruction_idx.cuda()), (tx, hx, cx)),
                    cached_information)

                prob = F.softmax(logit, dim=1)
                log_prob = F.log_softmax(logit, dim=1)
                entropy = -(log_prob * prob).sum(1)
                entropies.append(entropy)

                action = prob.multinomial().data
                log_prob = log_prob.gather(1, Variable(action.cuda()))
                action = action.cpu().numpy()[0, 0]

                (image, _), reward, done, _ = env.step(action)

                # done = done or (episode_length >= self.args.max_episode_length)
                if not done and (episode_length >= args.max_episode_length):
                    # If the agent has not taken
                    _, _, done, _ = env.step(
                        env.client.agent.action_space.get_stop_action_index())
                    done = True

                if done:
                    (image, instr), _, _ = env.reset()
                    curr_instr, prev_instr, next_instr = instr
                    curr_instruction_idx = np.array(curr_instr)
                    prev_instruction_idx = np.array(prev_instr)
                    next_instruction_idx = np.array(next_instr)
                    curr_instruction_idx = torch.from_numpy(
                        curr_instruction_idx).view(1, -1)
                    prev_instruction_idx = torch.from_numpy(
                        prev_instruction_idx).view(1, -1)
                    next_instruction_idx = torch.from_numpy(
                        next_instruction_idx).view(1, -1)

                image = torch.from_numpy(image).float()

                values.append(value)
                log_probs.append(log_prob)
                rewards.append(reward)

                if done:
                    break

            if rank == 0 and tensorboard is not None:
                # Log total reward and entropy
                tensorboard.log_scalar("Total_Reward", sum(rewards))
                mean_entropy = sum(entropies).data[0] / float(
                    max(episode_length, 1))
                tensorboard.log_scalar("Chaplot_Baseline_Entropy",
                                       mean_entropy)

            R = torch.zeros(1, 1)
            if not done:
                tx = Variable(
                    torch.from_numpy(np.array([episode_length])).long().cuda())
                value, _, _, _ = local_model(
                    (Variable(image.unsqueeze(0).cuda()),
                     Variable(curr_instruction_idx.cuda()),
                     Variable(prev_instruction_idx.cuda()),
                     Variable(next_instruction_idx.cuda()), (tx, hx, cx)))
                R = value.data

            values.append(Variable(R.cuda()))
            policy_loss = 0
            value_loss = 0
            R = Variable(R.cuda())

            gae = torch.zeros(1, 1).cuda()
            for i in reversed(range(len(rewards))):
                R = args.gamma * R + rewards[i]
                advantage = R - values[i]
                value_loss = value_loss + 0.5 * advantage.pow(2)

                if contextual_bandit:
                    # Just focus on immediate reward
                    gae = torch.from_numpy(np.array([[rewards[i]]])).float()
                else:
                    # Generalized Advantage Estimataion
                    delta_t = rewards[i] + args.gamma * \
                              values[i + 1].data - values[i].data
                    gae = gae * args.gamma * args.tau + delta_t

                policy_loss = policy_loss - \
                              log_probs[i] * Variable(gae.cuda()) - 0.02 * entropies[i]

            optimizer.zero_grad()

            p_losses.append(policy_loss.data[0, 0])
            v_losses.append(value_loss.data[0, 0])

            if len(p_losses) > 1000:
                num_iters += 1
                logger.log(" ".join([
                    # "Training thread: {}".format(rank),
                    "Num iters: {}K".format(num_iters),
                    "Avg policy loss: {}".format(np.mean(p_losses)),
                    "Avg value loss: {}".format(np.mean(v_losses))
                ]))
                p_losses = []
                v_losses = []

            (policy_loss + 0.5 * value_loss).backward()
            torch.nn.utils.clip_grad_norm(local_model.parameters(), 40)

            ChaplotBaseline.ensure_shared_grads(local_model, shared_model)
            optimizer.step()
예제 #7
0
    def do_train(chaplot_baseline,
                 shared_model,
                 config,
                 action_space,
                 meta_data_util,
                 args,
                 constants,
                 train_dataset,
                 tune_dataset,
                 experiment,
                 experiment_name,
                 rank,
                 server,
                 logger,
                 model_type,
                 contextual_bandit=False,
                 use_pushover=False):

        try:
            sys.stderr = sys.stdout
            server.initialize_server()
            # Local Config Variables
            lstm_size = 256

            # Test policy
            test_policy = gp.get_argmax_action
            # torch.manual_seed(args.seed + rank)

            if rank == 0:  # client 0 creates a tensorboard server
                tensorboard = Tensorboard(experiment_name)
            else:
                tensorboard = None

            # Create the Agent
            logger.log("STARTING AGENT")
            agent = Agent(server=server,
                          model=chaplot_baseline,
                          test_policy=test_policy,
                          action_space=action_space,
                          meta_data_util=meta_data_util,
                          config=config,
                          constants=constants)
            logger.log("Created Agent...")

            # Create a local model for rollouts
            local_model = model_type(args, config=config)
            if torch.cuda.is_available():
                local_model.cuda()
            chaplot_baseline.shared_model = local_model
            local_model.train()

            #  Our Environment Interface
            env = NavDroneServerInterface(agent, local_model, experiment,
                                          config, constants, None,
                                          train_dataset, tune_dataset, rank,
                                          logger, use_pushover)
            logger.log("Created NavDroneServerInterface")

            # optimizer = optim.SGD(self.shared_model.parameters(), lr=self.args.lr) --- changed Chaplot's optimizer
            optimizer = optim.Adam(shared_model.parameters(), lr=0.00025)
            p_losses = []
            v_losses = []

            launch_k_unity_builds([config["port"]],
                                  "./simulators/NavDroneLinuxBuild.x86_64")
            (image, instr), _, _, metadata, data_point = env.reset()
            curr_instr, prev_instr, next_instr = instr
            curr_instruction_idx = np.array(curr_instr)
            prev_instruction_idx = np.array(prev_instr)
            next_instruction_idx = np.array(next_instr)

            image = torch.from_numpy(image).float()
            curr_instruction_idx = torch.from_numpy(curr_instruction_idx).view(
                1, -1)
            prev_instruction_idx = torch.from_numpy(prev_instruction_idx).view(
                1, -1)
            next_instruction_idx = torch.from_numpy(next_instruction_idx).view(
                1, -1)

            done = True

            episode_length = 0
            num_iters = 0
            cx, hx = None, None
            global_id = 1

            while True:
                # Sync with the shared model
                local_model.load_state_dict(shared_model.state_dict())
                if done:
                    episode_length = 0
                    cx = Variable(torch.zeros(1, lstm_size).cuda())
                    hx = Variable(torch.zeros(1, lstm_size).cuda())

                else:
                    cx = Variable(cx.data.cuda())
                    hx = Variable(hx.data.cuda())

                values = []
                log_probs = []
                rewards = []
                entropies = []
                cached_information = None
                #############################
                lstm_rep = []
                image_rep = []
                actions = []
                goal_locations = []
                #############################

                for step in range(args.num_steps):
                    episode_length += 1
                    tx = Variable(
                        torch.from_numpy(np.array([episode_length
                                                   ])).long().cuda())

                    value, logit, (hx, cx), cached_information = local_model(
                        (Variable(image.unsqueeze(0).cuda()),
                         Variable(curr_instruction_idx.cuda()),
                         Variable(prev_instruction_idx.cuda()),
                         Variable(next_instruction_idx.cuda()), (tx, hx, cx)),
                        cached_information)

                    prob = F.softmax(logit, dim=1)
                    log_prob = F.log_softmax(logit, dim=1)
                    entropy = -(log_prob * prob).sum(1)
                    entropies.append(entropy)

                    action = prob.multinomial().data
                    ####################################
                    lstm_rep.append(cached_information["lstm_rep"])
                    image_rep.append(cached_information["image_rep"])
                    actions.append(action)
                    goal_location = ChaplotBaselineWithAuxiliary.get_goal_location(
                        metadata, data_point)
                    goal_locations.append(goal_location)
                    # ChaplotBaselineWithAuxiliary.save_visualized_image(image, goal_location, global_id)
                    global_id += 1
                    ####################################
                    log_prob = log_prob.gather(1, Variable(action.cuda()))
                    action = action.cpu().numpy()[0, 0]

                    (image, _), reward, done, _, metadata = env.step(action)

                    # done = done or (episode_length >= self.args.max_episode_length)
                    if not done and (episode_length >=
                                     args.max_episode_length):
                        # If the agent has not taken
                        _, _, done, _, metadata = env.step(
                            env.agent.action_space.get_stop_action_index())
                        done = True

                    if done:
                        (image,
                         instr), _, _, metadata, data_point = env.reset()
                        curr_instr, prev_instr, next_instr = instr
                        curr_instruction_idx = np.array(curr_instr)
                        prev_instruction_idx = np.array(prev_instr)
                        next_instruction_idx = np.array(next_instr)
                        curr_instruction_idx = torch.from_numpy(
                            curr_instruction_idx).view(1, -1)
                        prev_instruction_idx = torch.from_numpy(
                            prev_instruction_idx).view(1, -1)
                        next_instruction_idx = torch.from_numpy(
                            next_instruction_idx).view(1, -1)

                    image = torch.from_numpy(image).float()

                    values.append(value)
                    log_probs.append(log_prob)
                    rewards.append(reward)

                    if done:
                        break

                if rank == 0 and tensorboard is not None:
                    # Log total reward and entropy
                    tensorboard.log_scalar("Total_Reward", sum(rewards))
                    mean_entropy = sum(entropies).data[0] / float(
                        max(episode_length, 1))
                    tensorboard.log_scalar("Chaplot_Baseline_Entropy",
                                           mean_entropy)

                R = torch.zeros(1, 1)
                if not done:
                    tx = Variable(
                        torch.from_numpy(np.array([episode_length
                                                   ])).long().cuda())
                    value, _, _, _ = local_model(
                        (Variable(image.unsqueeze(0).cuda()),
                         Variable(curr_instruction_idx.cuda()),
                         Variable(prev_instruction_idx.cuda()),
                         Variable(next_instruction_idx.cuda()), (tx, hx, cx)))
                    R = value.data

                values.append(Variable(R.cuda()))
                policy_loss = 0
                value_loss = 0
                R = Variable(R.cuda())

                gae = torch.zeros(1, 1).cuda()
                entropy_coeff = max(0.0, 0.11 - env.num_epochs * 0.01)
                for i in reversed(range(len(rewards))):
                    R = args.gamma * R + rewards[i]
                    advantage = R - values[i]
                    value_loss = value_loss + 0.5 * advantage.pow(2)

                    if contextual_bandit:
                        # Just focus on immediate reward
                        gae = torch.from_numpy(np.array([[rewards[i]]
                                                         ])).float()
                    else:
                        # Generalized Advantage Estimataion
                        delta_t = rewards[i] + args.gamma * \
                                  values[i + 1].data - values[i].data
                        gae = gae * args.gamma * args.tau + delta_t

                    policy_loss = policy_loss - \
                                  log_probs[i] * Variable(gae.cuda()) - entropy_coeff * entropies[i]

                temporal_autoencoding_loss = None  # local_model.get_tae_loss(image_rep, actions)
                reward_prediction_loss = None  # local_model.get_reward_prediction_loss(lstm_rep, actions, rewards)
                alignment_loss, alignment_norm = None, None  # local_model.alignment_auxiliary(image_rep, cached_information["text_rep"])
                goal_prediction_loss = local_model.calc_goal_prediction_loss(
                    image_rep, cached_information["text_rep"], goal_locations)
                optimizer.zero_grad()

                p_losses.append(policy_loss.data[0, 0])
                v_losses.append(value_loss.data[0, 0])

                if len(p_losses) > 1000:
                    num_iters += 1
                    logger.log(" ".join([
                        # "Training thread: {}".format(rank),
                        "Num iters: {}K".format(num_iters),
                        "Avg policy loss: {}".format(np.mean(p_losses)),
                        "Avg value loss: {}".format(np.mean(v_losses))
                    ]))
                    p_losses = []
                    v_losses = []

                if rank == 0 and tensorboard is not None:
                    if done:
                        tensorboard.log_scalar("train_dist_error",
                                               metadata["stop_dist_error"])
                        task_completion = 0
                        if metadata["stop_dist_error"] < 5.0:
                            task_completion = 1
                        tensorboard.log_scalar("train_task_completion",
                                               task_completion)
                    # Log total reward and entropy
                    tensorboard.log_scalar("Value_Loss",
                                           float(value_loss.data))
                    if temporal_autoencoding_loss is not None:
                        tensorboard.log_scalar(
                            "TAE_Loss", float(temporal_autoencoding_loss.data))
                    if reward_prediction_loss is not None:
                        tensorboard.log_scalar(
                            "RP_Loss", float(reward_prediction_loss.data))
                    if alignment_loss is not None:
                        tensorboard.log_scalar(
                            "Mean_Current_Segment_Alignment_Loss",
                            float(alignment_loss.data))
                        tensorboard.log_scalar("Alignment_Norm",
                                               float(alignment_norm.data))
                    if goal_prediction_loss is not None:
                        tensorboard.log_scalar(
                            "Goal_Prediction_Loss",
                            float(goal_prediction_loss.data) /
                            float(len(rewards)))

                loss = policy_loss + 0.5 * value_loss
                if temporal_autoencoding_loss is not None:
                    loss += 0.5 * temporal_autoencoding_loss
                if reward_prediction_loss is not None:
                    loss += 0.5 * reward_prediction_loss
                if alignment_loss is not None:
                    loss += 0.5 * alignment_loss
                if goal_prediction_loss is not None:
                    loss += 0.5 * goal_prediction_loss
                loss.backward()
                torch.nn.utils.clip_grad_norm(local_model.parameters(), 40)
                ChaplotBaselineWithAuxiliary.ensure_shared_grads(
                    local_model, shared_model)
                optimizer.step()
        except Exception:
            print("Exception")
            exc_info = sys.exc_info()
            traceback.print_exception(*exc_info)
예제 #8
0
def main():

    experiment_name = "blocks_experiments"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/test_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")

    with open("data/blocks/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config, indent=2))
    setup_validator = BlocksSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config)
    meta_data_util = MetaDataUtil()

    # Create vocabulary
    vocab = dict()
    vocab_list = open("./Assets/vocab_both").readlines()
    for i, tk in enumerate(vocab_list):
        token = tk.strip().lower()
        vocab[token] = i
    vocab["$UNK$"] = len(vocab_list)
    config["vocab_size"] = len(vocab_list) + 1

    # Test policy
    test_policy = gp.get_argmax_action

    # Create tensorboard
    tensorboard = Tensorboard("Agent Test")

    try:
        # Create the model
        master_logger.log("CREATING MODEL")
        model_type = IncrementalModelEmnlp
        shared_model = model_type(config, constants)
        shared_model.load_saved_model(
            "./results/model-folder-name/model-file-name")

        # Read the dataset
        test_data = DatasetParser.parse("devset.json", config)
        master_logger.log("Created test dataset of size %d " % len(test_data))

        # Create server and launch a client
        simulator_file = "./simulators/blocks/retro_linux_build.x86_64"
        config["port"] = find_k_ports(1)[0]
        server = BlocksServer(config, action_space, vocab=vocab)

        # Launch unity
        launch_k_unity_builds([config["port"]], simulator_file)
        server.initialize_server()

        # Create the agent
        master_logger.log("CREATING AGENT")
        agent = Agent(server=server,
                      model=shared_model,
                      test_policy=test_policy,
                      action_space=action_space,
                      meta_data_util=meta_data_util,
                      config=config,
                      constants=constants)

        agent.test(test_data, tensorboard)

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #9
0
    def do_train_(shared_model,
                  config,
                  action_space,
                  meta_data_util,
                  constants,
                  train_dataset,
                  tune_dataset,
                  experiment,
                  experiment_name,
                  rank,
                  server,
                  logger,
                  model_type,
                  use_pushover=False):

        server.initialize_server()

        # Test policy
        test_policy = gp.get_argmax_action

        # torch.manual_seed(args.seed + rank)

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
        else:
            tensorboard = None

        if use_pushover:
            pushover_logger = PushoverLogger(experiment_name)
        else:
            pushover_logger = None

        # Create a local model for rollouts
        local_model = model_type(config, constants)
        # local_model.train()

        # Create the Agent
        logger.log("STARTING AGENT")
        agent = Agent(server=server,
                      model=local_model,
                      test_policy=test_policy,
                      action_space=action_space,
                      meta_data_util=meta_data_util,
                      config=config,
                      constants=constants)
        logger.log("Created Agent...")

        action_counts = [0] * action_space.num_actions()
        max_epochs = constants["max_epochs"]
        dataset_size = len(train_dataset)
        tune_dataset_size = len(tune_dataset)

        # Create the learner to compute the loss
        learner = AsynchronousAdvantageActorGAECritic(shared_model,
                                                      local_model,
                                                      action_space,
                                                      meta_data_util, config,
                                                      constants, tensorboard)

        # Launch unity
        launch_k_unity_builds([config["port"]],
                              "./simulators/NavDroneLinuxBuild.x86_64")

        for epoch in range(1, max_epochs + 1):

            learner.epoch = epoch
            task_completion_accuracy = 0
            mean_stop_dist_error = 0
            stop_dist_errors = []
            for data_point_ix, data_point in enumerate(train_dataset):

                # Sync with the shared model
                # local_model.load_state_dict(shared_model.state_dict())
                local_model.load_from_state_dict(shared_model.get_state_dict())

                if (data_point_ix + 1) % 100 == 0:
                    logger.log("Done %d out of %d" %
                               (data_point_ix, dataset_size))
                    logger.log("Training data action counts %r" %
                               action_counts)

                num_actions = 0
                max_num_actions = constants["horizon"] + constants[
                    "max_extra_horizon"]

                image, metadata = agent.server.reset_receive_feedback(
                    data_point)

                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                state = AgentObservedState(
                    instruction=data_point.instruction,
                    config=config,
                    constants=constants,
                    start_image=image,
                    previous_action=None,
                    pose=pose,
                    position_orientation=position_orientation,
                    data_point=data_point)
                state.goal = GoalPrediction.get_goal_location(
                    metadata, data_point, learner.image_height,
                    learner.image_width)

                model_state = None
                batch_replay_items = []
                total_reward = 0
                forced_stop = True

                while num_actions < max_num_actions:

                    # Sample action using the policy
                    log_probabilities, model_state, image_emb_seq, volatile = \
                        local_model.get_probs(state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))[0]

                    # Sample action from the probability
                    action = gp.sample_action_from_prob(probabilities)
                    action_counts[action] += 1

                    # Generate goal
                    if config["do_goal_prediction"]:
                        goal = learner.goal_prediction_calculator.get_goal_location(
                            metadata, data_point, learner.image_height,
                            learner.image_width)
                    else:
                        goal = None

                    if action == action_space.get_stop_action_index():
                        forced_stop = False
                        break

                    # Send the action and get feedback
                    image, reward, metadata = agent.server.send_action_receive_feedback(
                        action)

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state,
                                                   action,
                                                   reward,
                                                   log_prob=log_probabilities,
                                                   volatile=volatile,
                                                   goal=goal)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    pose = int(metadata["y_angle"] / 15.0)
                    position_orientation = (metadata["x_pos"],
                                            metadata["z_pos"],
                                            metadata["y_angle"])
                    state = state.update(
                        image,
                        action,
                        pose=pose,
                        position_orientation=position_orientation,
                        data_point=data_point)
                    state.goal = GoalPrediction.get_goal_location(
                        metadata, data_point, learner.image_height,
                        learner.image_width)

                    num_actions += 1
                    total_reward += reward

                # Send final STOP action and get feedback
                image, reward, metadata = agent.server.halt_and_receive_feedback(
                )
                total_reward += reward

                if metadata["stop_dist_error"] < 5.0:
                    task_completion_accuracy += 1
                mean_stop_dist_error += metadata["stop_dist_error"]
                stop_dist_errors.append(metadata["stop_dist_error"])

                if tensorboard is not None:
                    tensorboard.log_all_train_errors(
                        metadata["edit_dist_error"],
                        metadata["closest_dist_error"],
                        metadata["stop_dist_error"])

                # Store it in the replay memory list
                if not forced_stop:
                    replay_item = ReplayMemoryItem(
                        state,
                        action_space.get_stop_action_index(),
                        reward,
                        log_prob=log_probabilities,
                        volatile=volatile,
                        goal=goal)
                    batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                if len(batch_replay_items) > 0:  # 32:
                    loss_val = learner.do_update(batch_replay_items)
                    # self.action_prediction_loss_calculator.predict_action(batch_replay_items)
                    # del batch_replay_items[:]  # in place list clear

                    if tensorboard is not None:
                        cross_entropy = float(learner.cross_entropy.data[0])
                        tensorboard.log(cross_entropy, loss_val, 0)
                        entropy = float(
                            learner.entropy.data[0]) / float(num_actions + 1)
                        v_value_loss_per_step = float(
                            learner.value_loss.data[0]) / float(num_actions +
                                                                1)
                        tensorboard.log_scalar("entropy", entropy)
                        tensorboard.log_scalar("total_reward", total_reward)
                        tensorboard.log_scalar("v_value_loss_per_step",
                                               v_value_loss_per_step)
                        ratio = float(learner.ratio.data[0])
                        tensorboard.log_scalar(
                            "Abs_objective_to_entropy_ratio", ratio)

                        if learner.action_prediction_loss is not None:
                            action_prediction_loss = float(
                                learner.action_prediction_loss.data[0])
                            learner.tensorboard.log_action_prediction_loss(
                                action_prediction_loss)
                        if learner.temporal_autoencoder_loss is not None:
                            temporal_autoencoder_loss = float(
                                learner.temporal_autoencoder_loss.data[0])
                            tensorboard.log_temporal_autoencoder_loss(
                                temporal_autoencoder_loss)
                        if learner.object_detection_loss is not None:
                            object_detection_loss = float(
                                learner.object_detection_loss.data[0])
                            tensorboard.log_object_detection_loss(
                                object_detection_loss)
                        if learner.symbolic_language_prediction_loss is not None:
                            symbolic_language_prediction_loss = float(
                                learner.symbolic_language_prediction_loss.
                                data[0])
                            tensorboard.log_scalar(
                                "sym_language_prediction_loss",
                                symbolic_language_prediction_loss)
                        if learner.goal_prediction_loss is not None:
                            goal_prediction_loss = float(
                                learner.goal_prediction_loss.data[0])
                            tensorboard.log_scalar("goal_prediction_loss",
                                                   goal_prediction_loss)

            # Save the model
            local_model.save_model(experiment + "/contextual_bandit_" +
                                   str(rank) + "_epoch_" + str(epoch))
            logger.log("Training data action counts %r" % action_counts)
            mean_stop_dist_error = mean_stop_dist_error / float(
                len(train_dataset))
            task_completion_accuracy = (task_completion_accuracy *
                                        100.0) / float(len(train_dataset))
            logger.log("Training: Mean stop distance error %r" %
                       mean_stop_dist_error)
            logger.log("Training: Task completion accuracy %r " %
                       task_completion_accuracy)
            bins = range(0, 80, 3)  # range of distance
            histogram, _ = np.histogram(stop_dist_errors, bins)
            logger.log("Histogram of train errors %r " % histogram)

            if tune_dataset_size > 0:
                # Test on tuning data
                agent.test(tune_dataset,
                           tensorboard=tensorboard,
                           logger=logger,
                           pushover_logger=pushover_logger)
    def do_train_(shared_model,
                  config,
                  action_space,
                  meta_data_util,
                  args,
                  constants,
                  train_dataset,
                  tune_dataset,
                  experiment,
                  experiment_name,
                  rank,
                  server,
                  logger,
                  model_type,
                  use_pushover=False):

        server.initialize_server()

        # Test policy
        test_policy = gp.get_argmax_action

        # torch.manual_seed(args.seed + rank)

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
        else:
            tensorboard = None

        if use_pushover:
            pushover_logger = PushoverLogger(experiment_name)
        else:
            pushover_logger = None

        # Create a local model for rollouts
        local_model = model_type(args, config=config)
        if torch.cuda.is_available():
            local_model.cuda()
        local_model.train()

        # Create the Agent
        logger.log("STARTING AGENT")
        agent = Agent(server=server,
                      model=local_model,
                      test_policy=test_policy,
                      action_space=action_space,
                      meta_data_util=meta_data_util,
                      config=config,
                      constants=constants)
        logger.log("Created Agent...")

        action_counts = [0] * action_space.num_actions()
        max_epochs = constants["max_epochs"]
        dataset_size = len(train_dataset)
        tune_dataset_size = len(tune_dataset)

        # Create the learner to compute the loss
        learner = AsynchronousContextualBandit(shared_model, local_model,
                                               action_space, meta_data_util,
                                               config, constants, tensorboard)

        # Launch unity
        launch_k_unity_builds([
            config["port"]
        ], "/home/dipendra/Downloads/NavDroneLinuxBuild/NavDroneLinuxBuild.x86_64"
                              )

        for epoch in range(1, max_epochs + 1):

            if tune_dataset_size > 0:
                # Test on tuning data
                agent.test(tune_dataset,
                           tensorboard=tensorboard,
                           logger=logger,
                           pushover_logger=pushover_logger)

            for data_point_ix, data_point in enumerate(train_dataset):

                # Sync with the shared model
                # local_model.load_state_dict(shared_model.state_dict())
                local_model.load_from_state_dict(shared_model.get_state_dict())

                if (data_point_ix + 1) % 100 == 0:
                    logging.info("Done %d out of %d", data_point_ix,
                                 dataset_size)
                    logging.info("Training data action counts %r",
                                 action_counts)

                num_actions = 0
                # max_num_actions = len(data_point.get_trajectory())
                # max_num_actions += self.constants["max_extra_horizon"]
                max_num_actions = constants["horizon"]

                image, metadata = agent.server.reset_receive_feedback(
                    data_point)

                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                state = AgentObservedState(
                    instruction=data_point.instruction,
                    config=config,
                    constants=constants,
                    start_image=image,
                    previous_action=None,
                    pose=pose,
                    position_orientation=position_orientation,
                    data_point=data_point)

                model_state = None
                batch_replay_items = []
                total_reward = 0
                forced_stop = True

                while num_actions < max_num_actions:

                    # Sample action using the policy
                    log_probabilities, model_state, image_emb_seq, state_feature = \
                        local_model.get_probs(state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))[0]

                    # Sample action from the probability
                    action = gp.sample_action_from_prob(probabilities)
                    action_counts[action] += 1

                    if action == action_space.get_stop_action_index():
                        forced_stop = False
                        break

                    # Send the action and get feedback
                    image, reward, metadata = agent.server.send_action_receive_feedback(
                        action)

                    # Store it in the replay memory list
                    rewards = learner.get_all_rewards(metadata)
                    replay_item = ReplayMemoryItem(state,
                                                   action,
                                                   reward,
                                                   log_prob=log_probabilities,
                                                   all_rewards=rewards)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    pose = int(metadata["y_angle"] / 15.0)
                    position_orientation = (metadata["x_pos"],
                                            metadata["z_pos"],
                                            metadata["y_angle"])
                    state = state.update(
                        image,
                        action,
                        pose=pose,
                        position_orientation=position_orientation,
                        data_point=data_point)

                    num_actions += 1
                    total_reward += reward

                # Send final STOP action and get feedback
                image, reward, metadata = agent.server.halt_and_receive_feedback(
                )
                rewards = learner.get_all_rewards(metadata)
                total_reward += reward

                if tensorboard is not None:
                    tensorboard.log_all_train_errors(
                        metadata["edit_dist_error"],
                        metadata["closest_dist_error"],
                        metadata["stop_dist_error"])

                # Store it in the replay memory list
                if not forced_stop:
                    replay_item = ReplayMemoryItem(
                        state,
                        action_space.get_stop_action_index(),
                        reward,
                        log_prob=log_probabilities,
                        all_rewards=rewards)
                    batch_replay_items.append(replay_item)

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                if len(batch_replay_items) > 0:
                    loss_val = learner.do_update(batch_replay_items)
                    # self.action_prediction_loss_calculator.predict_action(batch_replay_items)
                    del batch_replay_items[:]  # in place list clear

                    if tensorboard is not None:
                        cross_entropy = float(learner.cross_entropy.data[0])
                        tensorboard.log(cross_entropy, loss_val, 0)
                        entropy = float(learner.entropy.data[0])
                        tensorboard.log_scalar("entropy", entropy)

                        ratio = float(learner.ratio.data[0])
                        tensorboard.log_scalar(
                            "Abs_objective_to_entropy_ratio", ratio)

                        if learner.action_prediction_loss is not None:
                            action_prediction_loss = float(
                                learner.action_prediction_loss.data[0])
                            learner.tensorboard.log_action_prediction_loss(
                                action_prediction_loss)
                        if learner.temporal_autoencoder_loss is not None:
                            temporal_autoencoder_loss = float(
                                learner.temporal_autoencoder_loss.data[0])
                            tensorboard.log_temporal_autoencoder_loss(
                                temporal_autoencoder_loss)
                        if learner.object_detection_loss is not None:
                            object_detection_loss = float(
                                learner.object_detection_loss.data[0])
                            tensorboard.log_object_detection_loss(
                                object_detection_loss)
                        if learner.symbolic_language_prediction_loss is not None:
                            symbolic_language_prediction_loss = float(
                                learner.symbolic_language_prediction_loss.
                                data[0])
                            tensorboard.log_scalar(
                                "sym_language_prediction_loss",
                                symbolic_language_prediction_loss)
                        if learner.goal_prediction_loss is not None:
                            goal_prediction_loss = float(
                                learner.goal_prediction_loss.data[0])
                            tensorboard.log_scalar("goal_prediction_loss",
                                                   goal_prediction_loss)
                        if learner.mean_factor_entropy is not None:
                            mean_factor_entropy = float(
                                learner.mean_factor_entropy.data[0])
                            tensorboard.log_factor_entropy_loss(
                                mean_factor_entropy)

            # Save the model
            local_model.save_model(experiment + "/contextual_bandit_" +
                                   str(rank) + "_epoch_" + str(epoch))

            logging.info("Training data action counts %r", action_counts)
예제 #11
0
    def do_train_(shared_model,
                  config,
                  action_space,
                  meta_data_util,
                  constants,
                  train_dataset,
                  tune_dataset,
                  experiment,
                  experiment_name,
                  rank,
                  server,
                  logger,
                  model_type,
                  use_pushover=False):

        server.initialize_server()

        # Test policy
        test_policy = gp.get_argmax_action

        # torch.manual_seed(args.seed + rank)

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
        else:
            tensorboard = None

        if use_pushover:
            pushover_logger = PushoverLogger(experiment_name)
        else:
            pushover_logger = None

        # Create a local model for rollouts
        local_model = model_type(config, constants)

        # Create the Agent
        logger.log("STARTING AGENT")
        agent = Agent(server=server,
                      model=local_model,
                      test_policy=test_policy,
                      action_space=action_space,
                      meta_data_util=meta_data_util,
                      config=config,
                      constants=constants)
        logger.log("Created Agent...")

        action_counts = [0] * action_space.num_actions()
        max_epochs = constants["max_epochs"]
        dataset_size = len(train_dataset)
        tune_dataset_size = len(tune_dataset)

        # Create the learner to compute the loss
        learner = AsynchronousSupervisedLearning(shared_model, local_model,
                                                 action_space, meta_data_util,
                                                 config, constants,
                                                 tensorboard)

        # Launch unity
        launch_k_unity_builds([config["port"]],
                              "./simulators/NavDroneLinuxBuild.x86_64")

        for epoch in range(1, max_epochs + 1):

            learner.epoch = epoch

            for data_point_ix, data_point in enumerate(train_dataset):

                # Sync with the shared model
                # local_model.load_state_dict(shared_model.state_dict())
                local_model.load_from_state_dict(shared_model.get_state_dict())

                if (data_point_ix + 1) % 100 == 0:
                    logger.log("Done %d out of %d" %
                               (data_point_ix, dataset_size))
                    logger.log("Training data action counts %r" %
                               action_counts)

                num_actions = 0
                trajectory = data_point.get_trajectory()
                image, metadata = agent.server.reset_receive_feedback(
                    data_point)

                pose = int(metadata["y_angle"] / 15.0)
                position_orientation = (metadata["x_pos"], metadata["z_pos"],
                                        metadata["y_angle"])
                state = AgentObservedState(
                    instruction=data_point.instruction,
                    config=config,
                    constants=constants,
                    start_image=image,
                    previous_action=None,
                    pose=pose,
                    position_orientation=position_orientation,
                    data_point=data_point)

                model_state = None
                batch_replay_items = []
                total_reward = 0

                for action in trajectory:

                    # Sample action using the policy
                    log_probabilities, model_state, image_emb_seq, volatile = \
                        local_model.get_probs(state, model_state)

                    action_counts[action] += 1

                    # Generate goal
                    if config["do_goal_prediction"]:
                        goal = learner.goal_prediction_calculator.get_goal_location(
                            metadata, data_point, 8, 8)
                        # learner.goal_prediction_calculator.save_attention_prob(image, volatile)
                        # time.sleep(5)
                    else:
                        goal = None

                    # Send the action and get feedback
                    image, reward, metadata = agent.server.send_action_receive_feedback(
                        action)

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state,
                                                   action,
                                                   reward,
                                                   log_prob=log_probabilities,
                                                   volatile=volatile,
                                                   goal=goal)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    pose = int(metadata["y_angle"] / 15.0)
                    position_orientation = (metadata["x_pos"],
                                            metadata["z_pos"],
                                            metadata["y_angle"])
                    state = state.update(
                        image,
                        action,
                        pose=pose,
                        position_orientation=position_orientation,
                        data_point=data_point)

                    num_actions += 1
                    total_reward += reward

                # Sample action using the policy
                log_probabilities, model_state, image_emb_seq, volatile = \
                    local_model.get_probs(state, model_state)

                # Generate goal
                if config["do_goal_prediction"]:
                    goal = learner.goal_prediction_calculator.get_goal_location(
                        metadata, data_point, 8, 8)
                    # learner.goal_prediction_calculator.save_attention_prob(image, volatile)
                    # time.sleep(5)
                else:
                    goal = None

                # Send final STOP action and get feedback
                image, reward, metadata = agent.server.halt_and_receive_feedback(
                )
                total_reward += reward

                if tensorboard is not None:
                    tensorboard.log_all_train_errors(
                        metadata["edit_dist_error"],
                        metadata["closest_dist_error"],
                        metadata["stop_dist_error"])

                # Store it in the replay memory list
                replay_item = ReplayMemoryItem(
                    state,
                    action_space.get_stop_action_index(),
                    reward,
                    log_prob=log_probabilities,
                    volatile=volatile,
                    goal=goal)
                batch_replay_items.append(replay_item)

                ###########################################3
                AsynchronousSupervisedLearning.save_goal(
                    batch_replay_items, data_point_ix, trajectory)
                ###########################################3

                # Update the scores based on meta_data
                # self.meta_data_util.log_results(metadata)

                # Perform update
                if len(batch_replay_items) > 0:  # 32:
                    loss_val = learner.do_update(batch_replay_items)
                    # self.action_prediction_loss_calculator.predict_action(batch_replay_items)
                    # del batch_replay_items[:]  # in place list clear

                    if tensorboard is not None:
                        cross_entropy = float(learner.cross_entropy.data[0])
                        tensorboard.log(cross_entropy, loss_val, 0)
                        entropy = float(
                            learner.entropy.data[0]) / float(num_actions + 1)
                        tensorboard.log_scalar("entropy", entropy)
                        tensorboard.log_scalar("total_reward", total_reward)

                        ratio = float(learner.ratio.data[0])
                        tensorboard.log_scalar(
                            "Abs_objective_to_entropy_ratio", ratio)

                        if learner.action_prediction_loss is not None:
                            action_prediction_loss = float(
                                learner.action_prediction_loss.data[0])
                            learner.tensorboard.log_action_prediction_loss(
                                action_prediction_loss)
                        if learner.temporal_autoencoder_loss is not None:
                            temporal_autoencoder_loss = float(
                                learner.temporal_autoencoder_loss.data[0])
                            tensorboard.log_temporal_autoencoder_loss(
                                temporal_autoencoder_loss)
                        if learner.object_detection_loss is not None:
                            object_detection_loss = float(
                                learner.object_detection_loss.data[0])
                            tensorboard.log_object_detection_loss(
                                object_detection_loss)
                        if learner.symbolic_language_prediction_loss is not None:
                            symbolic_language_prediction_loss = float(
                                learner.symbolic_language_prediction_loss.
                                data[0])
                            tensorboard.log_scalar(
                                "sym_language_prediction_loss",
                                symbolic_language_prediction_loss)
                        if learner.goal_prediction_loss is not None:
                            goal_prediction_loss = float(
                                learner.goal_prediction_loss.data[0])
                            tensorboard.log_scalar("goal_prediction_loss",
                                                   goal_prediction_loss)
                        if learner.goal_prob is not None:
                            goal_prob = float(learner.goal_prob.data[0])
                            tensorboard.log_scalar("goal_prob", goal_prob)
                        if learner.mean_factor_entropy is not None:
                            mean_factor_entropy = float(
                                learner.mean_factor_entropy.data[0])
                            tensorboard.log_factor_entropy_loss(
                                mean_factor_entropy)

            # Save the model
            local_model.save_model(experiment + "/supervised_learning_" +
                                   str(rank) + "_epoch_" + str(epoch))
            logger.log("Training data action counts %r" % action_counts)

            if tune_dataset_size > 0:
                # Test on tuning data
                agent.test_goal_prediction(tune_dataset,
                                           tensorboard=tensorboard,
                                           logger=logger,
                                           pushover_logger=pushover_logger)
예제 #12
0
def main():

    experiment_name = "test_block_baselines"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/test_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log("----------------------------------------------------------------")
    master_logger.log("                    STARING NEW EXPERIMENT                      ")
    master_logger.log("----------------------------------------------------------------")

    with open("data/blocks/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    print(json.dumps(config,indent=2))
    setup_validator = BlocksSetupValidator()
    setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    action_space = ActionSpace(config)
    meta_data_util = MetaDataUtil()

    # Create vocabulary
    vocab = dict()
    vocab_list = open("./Assets/vocab_both").readlines()
    for i, tk in enumerate(vocab_list):
        token = tk.strip().lower()
        vocab[token] = i
    vocab["$UNK$"] = len(vocab_list)
    config["vocab_size"] = len(vocab_list) + 1

    try:
        # Read the dataset
        if args.split == "train":
            test_data = DatasetParser.parse("trainset.json", config)
        elif args.split == "dev":
            test_data = DatasetParser.parse("devset.json", config)
        elif args.split == "test":
            test_data = DatasetParser.parse("testset.json", config)
        else:
            raise AssertionError("Unhandled dataset split %r. Only support train, dev and test." % args.split)
        master_logger.log("Created test dataset of size %d " % len(test_data))

        # Create server and launch a client
        simulator_file = "./simulators/blocks/retro_linux_build.x86_64"
        config["port"] = find_k_ports(1)[0]
        server = BlocksServer(config, action_space, vocab=vocab)

        # Launch unity
        launch_k_unity_builds([config["port"]], simulator_file)
        server.initialize_server()

        # Create the agent
        master_logger.log("CREATING AGENT")
        if args.baseline == "stop":
            agent_type = Agent.STOP
        elif args.baseline == "random":
            agent_type = Agent.RANDOM_WALK
        elif args.baseline == "frequent":
            agent_type = Agent.MOST_FREQUENT
            # TODO compute most frequent action from the dataset
        else:
            raise AssertionError("Unhandled agent type %r. Only support stop, random and frequent." % args.baseline)

        agent = Agent(agent_type=agent_type,
                      server=server,
                      action_space=action_space,
                      meta_data_util=meta_data_util,
                      constants=constants)

        agent.test(test_data)

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #13
0
def main():

    data_filename = "./simulators/house/AssetsHouse"
    experiment_name = "emnlp_camera_ready_test_human_performance"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Number of processes
    house_id = 3

    # Define log settings
    log_path = experiment + '/test_baseline_%d.log' % house_id
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")

    with open("data/house/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    constants['horizon'] = 40  # TODO HACK!!
    print(json.dumps(config, indent=2))

    # TODO: HouseSetupValidator()
    # setup_validator = BlocksSetupValidator()
    # setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    config["use_manipulation"] = True  # debug manipulation
    action_space = ActionSpace(config["action_names"], config["stop_action"],
                               config["use_manipulation"],
                               config["num_manipulation_row"],
                               config["num_manipulation_col"])
    meta_data_util = MetaDataUtil()

    # TODO: Create vocabulary
    vocab = dict()
    vocab_list = open(data_filename + "/house_all_vocab.txt").readlines()
    for i, tk in enumerate(vocab_list):
        token = tk.strip().lower()
        vocab[i] = token
    vocab[len(vocab_list)] = "$UNK$"
    config["vocab_size"] = len(vocab_list) + 1

    try:
        # Create the model
        master_logger.log("CREATING MODEL")
        model_type = TmpHouseIncrementalModelChaplot
        shared_model = model_type(config, constants)
        # model.load_saved_model("./results/paragraph_chaplot_attention/chaplot_model_epoch_3")

        # make the shared model use share memory
        shared_model.share_memory()

        master_logger.log("MODEL CREATED")
        print("Created Model...")

        # Read the dataset
        test_split = DatasetParser.parse(
            data_filename + "/tokenized_house" + str(house_id) +
            "_discrete_dev.json", config)
        test_split = test_split[2:20]

        # Launch the server
        ports = find_k_ports(1)
        port = ports[0]
        tmp_config = {k: v for k, v in config.items()}
        tmp_config["port"] = port
        print("Client " + str(0) + " getting a validation set of size ",
              len(test_split))
        server = HouseServer(tmp_config, action_space, port)

        launch_k_unity_builds([tmp_config["port"]],
                              "./house_" + str(house_id) + "_elmer.x86_64",
                              arg_str="--config ./AssetsHouse/config" +
                              str(house_id) + ".json",
                              cwd="./simulators/house/")

        server.initialize_server()

        # Create a local model for rollouts
        local_model = model_type(tmp_config, constants)
        # local_model.train()

        # Create the Agent
        print("STARTING AGENT")
        tmp_agent = TmpHouseAgent(server=server,
                                  model=local_model,
                                  test_policy=None,
                                  action_space=action_space,
                                  meta_data_util=meta_data_util,
                                  config=tmp_config,
                                  constants=constants)
        print("Created Agent...")
        tmp_agent.test_human_performance(test_split, vocab, master_logger)

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #14
0
    def do_supervised_train(chaplot_baseline,
                            shared_model,
                            config,
                            action_space,
                            meta_data_util,
                            args,
                            constants,
                            train_dataset,
                            tune_dataset,
                            experiment,
                            experiment_name,
                            rank,
                            server,
                            logger,
                            model_type,
                            contextual_bandit=False,
                            use_pushover=False):

        try:
            sys.stderr = sys.stdout
            server.initialize_server()
            # Local Config Variables
            lstm_size = 256

            # Test policy
            test_policy = gp.get_argmax_action
            # torch.manual_seed(args.seed + rank)

            if rank == 0:  # client 0 creates a tensorboard server
                tensorboard = Tensorboard(experiment_name)
            else:
                tensorboard = None

            # Create the Agent
            logger.log("STARTING AGENT")
            agent = Agent(server=server,
                          model=chaplot_baseline,
                          test_policy=test_policy,
                          action_space=action_space,
                          meta_data_util=meta_data_util,
                          config=config,
                          constants=constants)
            logger.log("Created Agent...")

            # Create a local model for rollouts
            local_model = model_type(args, config=config)
            if torch.cuda.is_available():
                local_model.cuda()
            chaplot_baseline.shared_model = local_model
            local_model.train()

            #  Our Environment Interface
            env = NavDroneServerInterface(agent, local_model, experiment,
                                          config, constants, None,
                                          train_dataset, tune_dataset, rank,
                                          logger, use_pushover)
            logger.log("Created NavDroneServerInterface")

            # optimizer = optim.SGD(self.shared_model.parameters(), lr=self.args.lr) --- changed Chaplot's optimizer
            optimizer = optim.Adam(shared_model.parameters(), lr=0.00025)
            p_losses = []
            v_losses = []

            launch_k_unity_builds([
                config["port"]
            ], "/home/dipendra/Downloads/NavDroneLinuxBuild/NavDroneLinuxBuild.x86_64"
                                  )

            done = True

            num_iters = 0
            global_id = 1

            while True:

                # Sync with the shared model
                local_model.load_state_dict(shared_model.state_dict())

                # Get a new datapoint
                (image, instr), _, _, metadata, data_point = env.reset()
                curr_instr, prev_instr, next_instr = instr
                curr_instruction_idx = np.array(curr_instr)
                prev_instruction_idx = np.array(prev_instr)
                next_instruction_idx = np.array(next_instr)

                image = torch.from_numpy(image).float()
                curr_instruction_idx = torch.from_numpy(
                    curr_instruction_idx).view(1, -1)
                prev_instruction_idx = torch.from_numpy(
                    prev_instruction_idx).view(1, -1)
                next_instruction_idx = torch.from_numpy(
                    next_instruction_idx).view(1, -1)

                episode_length = 0
                cx = Variable(torch.zeros(1, lstm_size).cuda())
                hx = Variable(torch.zeros(1, lstm_size).cuda())

                goal_x, goal_z = data_point.get_destination_list()[-1]
                trajectory_str = get_oracle_trajectory(metadata, goal_x,
                                                       goal_z, data_point)
                trajectory = [
                    action_space.get_action_index(act_str)
                    for act_str in trajectory_str
                ]
                # trajectory = data_point.get_trajectory()
                num_steps = len(trajectory) + 1  # 1 for stopping

                values = []
                log_probs = []
                rewards = []
                entropies = []
                cached_information = None
                #############################
                lstm_rep = []
                image_rep = []
                actions = []
                goal_locations = []
                #############################

                for step in range(num_steps):
                    episode_length += 1
                    tx = Variable(
                        torch.from_numpy(np.array([episode_length
                                                   ])).long().cuda())

                    value, logit, (hx, cx), cached_information = local_model(
                        (Variable(image.unsqueeze(0).cuda()),
                         Variable(curr_instruction_idx.cuda()),
                         Variable(prev_instruction_idx.cuda()),
                         Variable(next_instruction_idx.cuda()), (tx, hx, cx)),
                        cached_information)

                    prob = F.softmax(logit, dim=1)
                    log_prob = F.log_softmax(logit, dim=1)
                    entropy = -(log_prob * prob).sum(1)
                    entropies.append(entropy)

                    if step == len(trajectory):
                        action = action_space.get_stop_action_index()
                    else:
                        action = trajectory[step]
                    action_var = torch.from_numpy(np.array([[action]]))

                    ####################################
                    lstm_rep.append(cached_information["lstm_rep"])
                    image_rep.append(cached_information["image_rep"])
                    actions.append(action_var)
                    goal_location = ChaplotBaselineWithAuxiliary.get_goal_location(
                        metadata, data_point)
                    goal_locations.append(goal_location)
                    # ChaplotBaselineWithAuxiliary.save_visualized_image(image, goal_location, global_id)
                    global_id += 1
                    ####################################
                    log_prob = log_prob.gather(1, Variable(action_var.cuda()))

                    (image, _), reward, done, _, metadata = env.step(action)
                    image = torch.from_numpy(image).float()

                    values.append(value)
                    log_probs.append(log_prob)
                    rewards.append(reward)

                assert done, "Should be done as all trajectories are fully executed and stop with 'stop' action."

                if rank == 0 and tensorboard is not None:
                    # Log total reward and entropy
                    tensorboard.log_scalar("Total_Reward", sum(rewards))
                    mean_entropy = sum(entropies).data[0] / float(
                        max(episode_length, 1))
                    tensorboard.log_scalar("Chaplot_Baseline_Entropy",
                                           mean_entropy)

                R = torch.zeros(1, 1)
                values.append(Variable(R.cuda()))
                policy_loss = 0
                value_loss = 0
                R = Variable(R.cuda())

                entropy_coeff = max(0.0, 0.11 - env.num_epochs * 0.01)
                for i in reversed(range(len(rewards))):
                    R = args.gamma * R + rewards[i]
                    advantage = R - values[i]
                    value_loss = value_loss + 0.5 * advantage.pow(2)
                    policy_loss = policy_loss - \
                                  log_probs[i] - entropy_coeff * entropies[i]

                temporal_autoencoding_loss = None  # local_model.get_tae_loss(image_rep, actions)
                reward_prediction_loss = None  # local_model.get_reward_prediction_loss(lstm_rep, actions, rewards)
                alignment_loss, alignment_norm = None, None  # local_model.alignment_auxiliary(image_rep, cached_information["text_rep"])
                goal_prediction_loss = local_model.calc_goal_prediction_loss(
                    image_rep, cached_information["text_rep"], goal_locations)
                optimizer.zero_grad()

                p_losses.append(policy_loss.data[0, 0])
                v_losses.append(value_loss.data[0, 0])

                if len(p_losses) > 1000:
                    num_iters += 1
                    logger.log(" ".join([
                        # "Training thread: {}".format(rank),
                        "Num iters: {}K".format(num_iters),
                        "Avg policy loss: {}".format(np.mean(p_losses)),
                        "Avg value loss: {}".format(np.mean(v_losses))
                    ]))
                    p_losses = []
                    v_losses = []

                if rank == 0 and tensorboard is not None:
                    # Log total reward and entropy
                    tensorboard.log_scalar("Value_Loss",
                                           float(value_loss.data))
                    if temporal_autoencoding_loss is not None:
                        tensorboard.log_scalar(
                            "TAE_Loss", float(temporal_autoencoding_loss.data))
                    if reward_prediction_loss is not None:
                        tensorboard.log_scalar(
                            "RP_Loss", float(reward_prediction_loss.data))
                    if alignment_loss is not None:
                        tensorboard.log_scalar(
                            "Mean_Current_Segment_Alignment_Loss",
                            float(alignment_loss.data))
                        tensorboard.log_scalar("Alignment_Norm",
                                               float(alignment_norm.data))
                    if goal_prediction_loss is not None:
                        tensorboard.log_scalar(
                            "Goal_Prediction_Loss",
                            float(goal_prediction_loss.data) /
                            float(num_steps))

                loss = policy_loss + 0.5 * value_loss
                if temporal_autoencoding_loss is not None:
                    loss += 0.5 * temporal_autoencoding_loss
                if reward_prediction_loss is not None:
                    loss += 0.5 * reward_prediction_loss
                if alignment_loss is not None:
                    loss += 0.5 * alignment_loss
                if goal_prediction_loss is not None:
                    loss += 20.0 * goal_prediction_loss
                    loss = goal_prediction_loss

                loss.backward()
                torch.nn.utils.clip_grad_norm(local_model.parameters(), 40)
                ChaplotBaselineWithAuxiliary.ensure_shared_grads(
                    local_model, shared_model)
                optimizer.step()
        except Exception:
            print("Exception")
            exc_info = sys.exc_info()
            traceback.print_exception(*exc_info)
예제 #15
0
    logging.log(logging.DEBUG, "MODEL CREATED")

    # Create the agent
    logging.log(logging.DEBUG, "STARTING AGENT")
    agent = Agent(server=server,
                  model=model,
                  test_policy=test_policy,
                  action_space=action_space,
                  meta_data_util=meta_data_util,
                  config=config,
                  constants=constants)

    # create tensorboard
    tensorboard = Tensorboard("dummy")

    # Launch Unity Build
    launch_k_unity_builds([config["port"]],
                          "./simulators/NavDroneLinuxBuild.x86_64")

    test_data = DatasetParser.parse("data/nav_drone/dev_annotations_6000.json",
                                    config)
    agent.test(test_data, tensorboard)

    server.kill()

except Exception:
    server.kill()
    exc_info = sys.exc_info()
    traceback.print_exception(*exc_info)
    # raise e
예제 #16
0
    def do_train_(simulator_file,
                  shared_model,
                  config,
                  action_space,
                  meta_data_util,
                  constants,
                  train_dataset,
                  tune_dataset,
                  experiment,
                  experiment_name,
                  rank,
                  server,
                  logger,
                  model_type,
                  use_pushover=False):

        # Launch unity
        launch_k_unity_builds([config["port"]], simulator_file)
        server.initialize_server()

        # Test policy
        test_policy = gp.get_argmax_action

        # torch.manual_seed(args.seed + rank)

        if rank == 0:  # client 0 creates a tensorboard server
            tensorboard = Tensorboard(experiment_name)
        else:
            tensorboard = None

        if use_pushover:
            pushover_logger = PushoverLogger(experiment_name)
        else:
            pushover_logger = None

        # Create a local model for rollouts
        local_model = model_type(config, constants)
        # local_model.train()

        # Create the Agent
        logger.log("STARTING AGENT")
        agent = Agent(server=server,
                      model=local_model,
                      test_policy=test_policy,
                      action_space=action_space,
                      meta_data_util=meta_data_util,
                      config=config,
                      constants=constants)
        logger.log("Created Agent...")

        action_counts = [0] * action_space.num_actions()
        max_epochs = constants["max_epochs"]
        dataset_size = len(train_dataset)
        tune_dataset_size = len(tune_dataset)

        # Create the learner to compute the loss
        learner = AsynchronousContextualBandit(shared_model, local_model,
                                               action_space, meta_data_util,
                                               config, constants, tensorboard)

        for epoch in range(1, max_epochs + 1):

            for data_point_ix, data_point in enumerate(train_dataset):

                # Sync with the shared model
                local_model.load_from_state_dict(shared_model.get_state_dict())

                if (data_point_ix + 1) % 100 == 0:
                    logger.log("Done %d out of %d" %
                               (data_point_ix, dataset_size))
                    logger.log("Training data action counts %r" %
                               action_counts)

                num_actions = 0
                max_num_actions = constants["horizon"] + constants[
                    "max_extra_horizon"]

                image, metadata = agent.server.reset_receive_feedback(
                    data_point)

                state = AgentObservedState(instruction=data_point.instruction,
                                           config=config,
                                           constants=constants,
                                           start_image=image,
                                           previous_action=None,
                                           data_point=data_point)
                meta_data_util.start_state_update_metadata(state, metadata)

                model_state = None
                batch_replay_items = []
                total_reward = 0
                forced_stop = True

                while num_actions < max_num_actions:

                    # Sample action using the policy
                    log_probabilities, model_state, image_emb_seq, volatile = \
                        local_model.get_probs(state, model_state)
                    probabilities = list(torch.exp(log_probabilities.data))[0]

                    # Sample action from the probability
                    action = gp.sample_action_from_prob(probabilities)
                    action_counts[action] += 1

                    if action == action_space.get_stop_action_index():
                        forced_stop = False
                        break

                    # Send the action and get feedback
                    image, reward, metadata = agent.server.send_action_receive_feedback(
                        action)

                    # Store it in the replay memory list
                    replay_item = ReplayMemoryItem(state,
                                                   action,
                                                   reward,
                                                   log_prob=log_probabilities,
                                                   volatile=volatile)
                    batch_replay_items.append(replay_item)

                    # Update the agent state
                    state = state.update(image, action, data_point=data_point)
                    meta_data_util.state_update_metadata(state, metadata)

                    num_actions += 1
                    total_reward += reward

                # Send final STOP action and get feedback
                image, reward, metadata = agent.server.halt_and_receive_feedback(
                )
                total_reward += reward

                if tensorboard is not None:
                    meta_data_util.state_update_metadata(tensorboard, metadata)

                # Store it in the replay memory list
                if not forced_stop:
                    replay_item = ReplayMemoryItem(
                        state,
                        action_space.get_stop_action_index(),
                        reward,
                        log_prob=log_probabilities,
                        volatile=volatile)
                    batch_replay_items.append(replay_item)

                # Perform update
                if len(batch_replay_items) > 0:
                    loss_val = learner.do_update(batch_replay_items)

                    if tensorboard is not None:
                        entropy = float(
                            learner.entropy.data[0]) / float(num_actions + 1)
                        tensorboard.log_scalar("loss", loss_val)
                        tensorboard.log_scalar("entropy", entropy)
                        tensorboard.log_scalar("total_reward", total_reward)

            # Save the model
            local_model.save_model(experiment + "/contextual_bandit_" +
                                   str(rank) + "_epoch_" + str(epoch))
            logger.log("Training data action counts %r" % action_counts)

            if tune_dataset_size > 0:
                # Test on tuning data
                agent.test(tune_dataset,
                           tensorboard=tensorboard,
                           logger=logger,
                           pushover_logger=pushover_logger)
예제 #17
0
    def test_multiprocess(house_id,
                          test_dataset,
                          config,
                          action_space,
                          port,
                          agent_type,
                          meta_data_util,
                          constants,
                          vocab,
                          logger,
                          pushover_logger=None):

        # start the python client
        logger.log("In Testing...")
        launch_k_unity_builds([config["port"]],
                              "./house_" + str(house_id) + "_elmer.x86_64",
                              arg_str="--config ./AssetsHouse/config" +
                              str(house_id) + ".json",
                              cwd="./simulators/house/")
        logger.log("Launched Builds")

        # start the server
        server = HouseServer(config, action_space, port)
        server.initialize_server()
        server.clear_metadata()
        logger.log("Server Initialized...")

        max_num_actions = constants["horizon"]
        task_completion_accuracy = 0
        metadata = {"feedback": ""}
        action_counts = [0] * action_space.num_actions()

        for data_point in test_dataset:
            image, metadata = server.reset_receive_feedback(data_point)
            action_seq = data_point.get_trajectory()
            act_idx = 0
            num_actions = 0
            instruction_string = " ".join(
                [vocab[token_id] for token_id in data_point.instruction])
            Agent.log("Instruction is %r " % instruction_string, logger)
            while True:

                if agent_type == Agent.STOP:
                    action = action_space.get_stop_action_index()
                elif agent_type == Agent.RANDOM_WALK:
                    actions = list(range(0, action_space.num_actions()))
                    # actions.remove(action_space.get_stop_action_index())
                    action = random.choice(actions)
                elif agent_type == Agent.ORACLE:
                    if act_idx == len(action_seq):
                        action = action_space.get_stop_action_index()
                    else:
                        action = action_seq[act_idx]
                        act_idx += 1
                elif agent_type == Agent.MOST_FREQUENT:
                    action = 0  # Assumes that most frequent action is the first action
                else:
                    raise AssertionError("Unknown type " + agent_type)

                if action == action_space.get_stop_action_index(
                ) or num_actions >= max_num_actions:
                    # Send the action and get feedback
                    image, reward, metadata = server.halt_and_receive_feedback(
                    )
                    action_counts[action_space.get_stop_action_index()] += 1

                    if metadata["navigation-error"] <= 1.0:
                        task_completion_accuracy += 1

                    # Update the scores based on meta_data
                    meta_data_util.log_results(metadata)
                    Agent.log(metadata, logger)
                    break
                else:
                    # Send the action and get feedback
                    image, reward, metadata = server.send_action_receive_feedback(
                        action)
                    action_counts[action] += 1
                    num_actions += 1

        task_completion_accuracy = (task_completion_accuracy * 100.0) / float(
            max(len(test_dataset), 1))
        Agent.log("House %r Overall test results:" % house_id, logger)
        Agent.log(
            "House %r Test Data Size %r:" % (house_id, len(test_dataset)),
            logger)
        Agent.log(
            "House %r Overall mean navigation error %r:" %
            (house_id, metadata["mean-navigation-error"]), logger)
        Agent.log(
            "House %r Testing: Final Metadata: %r" % (house_id, metadata),
            logger)
        Agent.log(
            "House %r Testing: Action Distribution: %r" %
            (house_id, action_counts), logger)
        Agent.log(
            "House %r Testing: Manipulation Accuracy: %r " %
            (house_id, metadata["mean-manipulation-accuracy"]), logger)
        Agent.log(
            "House %r Testing: Navigation Accuracy: %r " %
            (house_id, task_completion_accuracy), logger)
        # self.meta_data_util.log_results(metadata, logger)
        Agent.log(
            "House %r Testing data action counts %r" %
            (house_id, action_counts), logger)
        if pushover_logger is not None:
            pushover_feedback = str(metadata["feedback"])
            pushover_logger.log(pushover_feedback)
예제 #18
0
파일: test_oracle.py 프로젝트: lil-lab/ciff
logging.info("START SCRIPT CONTENTS")
with open(__file__) as f:
    for line in f.readlines():
        logging.info(">>> " + line.strip())
logging.info("END SCRIPT CONTENTS")

act_space = ActionSpace(config["action_names"], config["stop_action"])
meta_data_util = MetaDataUtil()

# Create the server
logging.log(logging.DEBUG, "STARTING SERVER")
server = HouseServer(config, act_space, ports[0])
logging.log(logging.DEBUG, "STARTED SERVER")

# Launch the build
launch_k_unity_builds([ports[0]], "./simulators/house_3_elmer.x86_64")
# Launched the build
server.connect()

# Create the agent
logging.log(logging.DEBUG, "STARTING AGENT")
agent = Agent(Agent.ORACLE, server, act_space, meta_data_util)

# Read the house dataset
dev_dataset = DatasetParser.parse("data/house/dataset/house_3_dev.json",
                                  config)
logging.info("Created test dataset of size %d ", len(dev_dataset))

# Test on this dataset
agent.test(dev_dataset)
예제 #19
0
def main():

    data_filename = "./simulators/house/AssetsHouse"
    experiment_name = "tmp_house_1_debug_manual_control"
    experiment = "./results/" + experiment_name
    print("EXPERIMENT NAME: ", experiment_name)

    # Create the experiment folder
    if not os.path.exists(experiment):
        os.makedirs(experiment)

    # Define log settings
    log_path = experiment + '/train_baseline.log'
    multiprocess_logging_manager = MultiprocessingLoggerManager(
        file_path=log_path, logging_level=logging.INFO)
    master_logger = multiprocess_logging_manager.get_logger("Master")
    master_logger.log(
        "----------------------------------------------------------------")
    master_logger.log(
        "                    STARING NEW EXPERIMENT                      ")
    master_logger.log(
        "----------------------------------------------------------------")

    with open("data/house/config.json") as f:
        config = json.load(f)
    with open("data/shared/contextual_bandit_constants.json") as f:
        constants = json.load(f)
    constants['horizon'] = 40  # TODO HACK!!
    print(json.dumps(config, indent=2))

    # TODO: HouseSetupValidator()
    # setup_validator = BlocksSetupValidator()
    # setup_validator.validate(config, constants)

    # log core experiment details
    master_logger.log("CONFIG DETAILS")
    for k, v in sorted(config.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("CONSTANTS DETAILS")
    for k, v in sorted(constants.items()):
        master_logger.log("    %s --- %r" % (k, v))
    master_logger.log("START SCRIPT CONTENTS")
    with open(__file__) as f:
        for line in f.readlines():
            master_logger.log(">>> " + line.strip())
    master_logger.log("END SCRIPT CONTENTS")

    config["use_manipulation"] = True  # debug manipulation
    action_space = ActionSpace(config["action_names"], config["stop_action"],
                               config["use_manipulation"],
                               config["num_manipulation_row"],
                               config["num_manipulation_col"])
    meta_data_util = MetaDataUtil()

    # TODO: Create vocabulary
    vocab = dict()
    vocab_list = open(data_filename + "/house_all_vocab.txt").readlines()
    for i, tk in enumerate(vocab_list):
        token = tk.strip().lower()
        # vocab[token] = i
        vocab[i] = token
    # vocab["$UNK$"] = len(vocab_list)
    vocab[len(vocab_list)] = "$UNK$"
    config["vocab_size"] = len(vocab_list) + 1

    # Number of processes
    house_ids = [1]  # [1,2,3]
    num_processes = len(house_ids)

    try:
        # Create the model
        master_logger.log("CREATING MODEL")
        model_type = TmpHouseIncrementalModelChaplot
        shared_model = model_type(config, constants)
        # model.load_saved_model("./results/paragraph_chaplot_attention/chaplot_model_epoch_3")

        # make the shared model use share memory
        shared_model.share_memory()

        master_logger.log("MODEL CREATED")
        print("Created Model...")

        # Read the dataset
        tune_split, train_split = [], []
        for hid in house_ids:
            all_train_data = DatasetParser.parse(
                data_filename + "/tokenized_house" + str(hid) +
                "_discrete.json", config)
            all_dev_data = DatasetParser.parse(
                data_filename + "/tokenized_house" + str(hid) +
                "_discrete_dev.json", config)
            # num_tune = int(len(all_train_data) * 0.1)
            # train_split.append(list(all_train_data[num_tune:]))
            # tune_split.append(list(all_train_data[:num_tune]))

            # Extract type of the dataset
            # lines = open("./simulators/house/datapoint_type_house" + str(hid) + "_v5_110.txt").readlines()
            # datapoint_id_type = {}
            # for line in lines:
            #     datapoint_id, datapoint_type = line.split()
            #     datapoint_id_type[int(datapoint_id)] = datapoint_type.strip()
            #
            # # Filter manipulation type
            # all_train_data = list(
            #     filter(lambda datapoint: datapoint_id_type[datapoint.get_id()] == "navigation", all_train_data))

            all_train_data = all_train_data[0:50]
            train_split.append(all_train_data)
            tune_split.append(all_train_data)
            # train_split.append(all_train_data)
            # tune_split.append(all_dev_data)

        # Launch the server
        ports = find_k_ports(1)
        port = ports[0]
        tmp_config = {k: v for k, v in config.items()}
        tmp_config["port"] = port
        tmp_tune_split = tune_split[0]
        print("Client " + str(0) + " getting a validation set of size ",
              len(tmp_tune_split))
        server = HouseServer(tmp_config, action_space, port)

        house_id = house_ids[0]
        launch_k_unity_builds([tmp_config["port"]],
                              "./house_" + str(house_id) + "_elmer.x86_64",
                              arg_str="--config ./AssetsHouse/config" +
                              str(house_id) + ".json",
                              cwd="./simulators/house/")

        server.initialize_server()

        # Create a local model for rollouts
        local_model = model_type(tmp_config, constants)
        # local_model.train()

        # Create the Agent
        print("STARTING AGENT")
        tmp_agent = TmpHouseAgent(server=server,
                                  model=local_model,
                                  test_policy=None,
                                  action_space=action_space,
                                  meta_data_util=meta_data_util,
                                  config=tmp_config,
                                  constants=constants)
        print("Created Agent...")
        index = 0
        while True:
            print("Giving another data %r ", len(train_split[0]))
            # index = random.randint(0, len(train_split[0]) - 1)
            index = (index + 1) % len(train_split[0])
            print("Dataset id is " + str(train_split[0][index].get_id()))
            tmp_agent.debug_manual_control(train_split[0][index], vocab)
            # tmp_agent.debug_tracking(train_split[0][index], vocab)

    except Exception:
        exc_info = sys.exc_info()
        traceback.print_exception(*exc_info)
예제 #20
0
# Test policy
test_policy = gp.get_argmax_action

with open("data/nav_drone/config_localmoves_6000.json") as f:
    config = json.load(f)
with open("data/shared/contextual_bandit_constants.json") as f:
    constants = json.load(f)
if len(sys.argv) > 1:
    config["port"] = int(sys.argv[1])
setup_validator = NavDroneSetupValidator()
setup_validator.validate(config, constants)

ports = find_k_ports(1)
config["port"] = ports[0]
launch_k_unity_builds(ports, "simulators/NavDroneLinuxBuild.x86_64")

# log core experiment details
logging.info("CONFIG DETAILS")
for k, v in sorted(config.items()):
    logging.info("    %s --- %r" % (k, v))
logging.info("CONSTANTS DETAILS")
for k, v in sorted(constants.items()):
    logging.info("    %s --- %r" % (k, v))
logging.info("START SCRIPT CONTENTS")
with open(__file__) as f:
    for line in f.readlines():
        logging.info(">>> " + line.strip())
logging.info("END SCRIPT CONTENTS")

action_space = ActionSpace(config["action_names"], config["stop_action"])