Exemplo n.º 1
0
def import_include_params(params):
    includes = params.get("@include") or []
    inherited_params = {}
    for include in includes:
        print("Including params:", include)
        incl_params = load_params(include)
        if incl_params is None:
            raise ValueError("No parameter file include found for: ", include)
        incl_params = import_include_params(incl_params)
        inherited_params = dict_merge(inherited_params, incl_params)

    # Overlay the defined parameters on top of the included parameters
    params = dict_merge(inherited_params, params)
    return params
Exemplo n.º 2
0
 def get_activation_statistics(self, keys):
     stats = {}
     from utils.dict_tools import dict_merge
     for key in keys:
         t = self.tensor_store.get_inputs_batch(key)
         t_stats = self.calc_tensor_statistics(key, t)
         stats = dict_merge(stats, t_stats)
     return stats
Exemplo n.º 3
0
def train_rl():
    initialize_experiment()

    setup = get_current_parameters()["Setup"]
    params = get_current_parameters()["RL"]

    print("Loading data")
    train_envs, dev_envs, test_envs = get_restricted_env_id_lists()

    filename = "rl_" + setup["model"] + "_" + setup["run_name"]

    trainer = TrainerRL(params=dict_merge(setup, params))

    for start_epoch in range(10000):
        epfname = epoch_filename(filename, start_epoch)
        path = os.path.join(get_model_dir(), str(epfname) + ".pytorch")
        if not os.path.exists(path):
            break

    if start_epoch > 0:
        print(f"CONTINUING RL TRAINING FROM EPOCH: {start_epoch}")
        load_pytorch_model(trainer.full_model,
                           epoch_filename(filename, start_epoch - 1))
        trainer.set_start_epoch(start_epoch)

    print("Beginning training...")
    best_dev_reward = -1e+10
    for epoch in range(start_epoch, 10000):
        train_reward, metrics = trainer.train_epoch(eval=False, envs="train")
        # TODO: Test on just a few dev environments
        # TODO: Take most likely or mean action when testing
        dev_reward, metrics = trainer.train_epoch(eval=True, envs="dev")
        #dev_reward, metrics = trainer.train_epoch(eval=True, envs="dev")
        dev_reward = 0

        #if dev_reward >= best_dev_reward:
        #    best_dev_reward = dev_reward
        #    save_pytorch_model(trainer.full_model, filename)
        #    print("Saved model in:", filename)

        print("Epoch", epoch, "train reward:", train_reward, "dev reward:",
              dev_reward)
        save_pytorch_model(trainer.full_model, epoch_filename(filename, epoch))
        if hasattr(trainer.full_model, "save"):
            trainer.full_model.save(epoch)
Exemplo n.º 4
0
def setup_parameter_namespaces():
    global_params = P.load_parameters(None)
    systems_params = global_params["MultipleEval"]["SystemsParams"]
    setup_overlay = global_params["MultipleEval"]["SetupOverlay"]

    # ----------------------------------------------------------------------------------------
    # Initialize systems
    # ----------------------------------------------------------------------------------------

    # Set up parameters for each system that will be evaluated
    system_namespaces = []
    for system_params in systems_params:
        namespace = system_params["param_namespace"]
        local_params = P.load_parameters(system_params["param_file"])
        local_params["Setup"] = dict_merge(local_params["Setup"],
                                           setup_overlay)
        P.set_parameters_for_namespace(local_params, namespace)
        system_namespaces.append(namespace)

    P.log_experiment_start(global_params["MultipleEval"]["run_name"])
    return global_params, system_namespaces
Exemplo n.º 5
0
def calc_rollout_metrics(rollouts):
    ev = DataEvalNL("", save_images=False, entire_trajectory=False)
    metrics = {}
    total_task_success = 0
    total_stop_success = 0
    coverages = []
    reward_keys = [k for k in rollouts[0][0].keys() if k.endswith("_reward")]
    rewards = {k: [] for k in reward_keys}

    for rollout in rollouts:
        success = ev.rollout_success(rollout[0]["env_id"],
                                     rollout[0]["set_idx"],
                                     rollout[0]["seg_idx"], rollout)

        # Take sum of rewards for each type of reward
        for k in reward_keys:
            v = sum([s[k] for s in rollout])
            rewards[k].append(v)

        visit_success = stop_success(rollout)
        total_stop_success += 1 if visit_success else 0
        total_task_success += 1 if success else 0

    task_success_rate = total_task_success / len(rollouts)
    visit_success_rate = total_stop_success / len(rollouts)

    metrics["task_success_rate"] = task_success_rate
    metrics["visit_success_rate"] = visit_success_rate

    rollout_lens = [len(rollout) for rollout in rollouts]
    metrics["mean_rollout_len"] = np.asarray(rollout_lens).mean()

    # Average each reward across rollouts
    rewards = {k: np.mean(np.asarray(l)) for k, l in rewards.items()}
    metrics = dict_merge(metrics, rewards)
    return metrics
Exemplo n.º 6
0
def train_rl_worker(sup_process_conn):
    P.initialize_experiment()
    setup = P.get_current_parameters()["Setup"]
    setup["trajectory_length"] = setup["rl_trajectory_length"]
    run_name = setup["run_name"]
    rlsup = P.get_current_parameters()["RLSUP"]
    params = P.get_current_parameters()["RL"]
    num_rl_epochs = params["num_epochs"]
    # These need to be distinguished between supervised and RL because supervised trains on ALL envs, RL only on 6000-7000
    setup["env_range_start"] = setup["rl_env_range_start"]
    setup["env_range_end"] = setup["rl_env_range_end"]
    rl_device = rlsup.get("rl_device", "cuda:0")

    trainer = TrainerRL(params=dict_merge(setup, params),
                        save_rollouts_to_dataset=rl_dataset_name(run_name),
                        device=rl_device)

    # -------------------------------------------------------------------------------------
    # TODO: Continue (including figure out how to initialize Supervised Stage 1 real/sim/critic and RL Stage 2 policy
    start_rl_epoch = 0
    for start_rl_epoch in range(num_rl_epochs):
        epfname = epoch_rl_filename(run_name, start_rl_epoch, model="full")
        path = os.path.join(get_model_dir(), str(epfname) + ".pytorch")
        if not os.path.exists(path):
            break
    if start_rl_epoch > 0:
        print(f"RLP: CONTINUING RL TRAINING FROM EPOCH: {start_rl_epoch}")
        load_pytorch_model(
            trainer.full_model,
            epoch_rl_filename(run_name, start_rl_epoch - 1, model="full"))
        trainer.set_start_epoch(start_rl_epoch)
    # Wait for supervised process to send it's model
    sleep(2)

    # -------------------------------------------------------------------------------------

    print("RLP: Beginning training...")
    for rl_epoch in range(start_rl_epoch, num_rl_epochs):
        # Get the latest Stage 1 model. Halt on the first epoch so that we can actually initialize the Stage 1
        new_stage1_model_state_dict = receive_stage1_state(
            sup_process_conn, halt=(rl_epoch == start_rl_epoch))
        if new_stage1_model_state_dict:
            print(f"RLP: Re-loading latest Stage 1 model")
            trainer.reload_stage1(new_stage1_model_state_dict)

        train_reward, metrics = trainer.train_epoch(epoch_num=rl_epoch,
                                                    eval=False,
                                                    envs="train")
        dev_reward, metrics = trainer.train_epoch(epoch_num=rl_epoch,
                                                  eval=True,
                                                  envs="dev")

        print("RLP: RL Epoch", rl_epoch, "train reward:", train_reward,
              "dev reward:", dev_reward)
        save_pytorch_model(trainer.full_model,
                           epoch_rl_filename(run_name, rl_epoch, model="full"))
        save_pytorch_model(
            trainer.full_model.stage1_visitation_prediction,
            epoch_rl_filename(run_name, rl_epoch, model="stage1"))
        save_pytorch_model(
            trainer.full_model.stage2_action_generation,
            epoch_rl_filename(run_name, rl_epoch, model="stage2"))
Exemplo n.º 7
0
    def single_segment_rollout(self,
                               env_id,
                               set_idx,
                               seg_idx,
                               do_sample,
                               dagger_beta=0,
                               rl_rollout=True):
        instruction_sets = self.all_instructions[env_id][set_idx][
            'instructions']
        for instruction_set in instruction_sets:
            if instruction_set["seg_idx"] == seg_idx:
                break

        # TODO: Get rid of this idiocy:
        md.IS_ROLLOUT = True

        instruction_set = get_instruction_segment(
            env_id, set_idx, seg_idx, all_instr=self.all_instructions)

        self.env.set_environment(env_id,
                                 instruction_set=instruction_sets,
                                 fast=True)
        self.env.set_current_segment(seg_idx)

        self.policy.start_sequence()
        if hasattr(self.policy, "start_segment_rollout"):
            self.policy.start_segment_rollout(env_id, set_idx, seg_idx)
        if self.oracle:
            self.oracle.start_segment_rollout(env_id, set_idx, seg_idx)

        string_instruction, end_idx, start_idx = instruction_set[
            "instruction"], instruction_set["end_idx"], instruction_set[
                "start_idx"]
        token_instruction = self.tokenize_string(string_instruction)

        # TODO: Support oracle (including setCurrentSegment, and setting the path)
        rollout_sample = []

        # Reset the drone to the segment starting position:
        state = self.env.reset(seg_idx)

        first = True
        while True:
            action, rl_stuff = self.policy.get_action(state,
                                                      token_instruction,
                                                      sample=do_sample,
                                                      rl_rollout=rl_rollout)

            if self.oracle:
                ref_action, _ = self.oracle.get_action(state,
                                                       token_instruction)
                exec_action = self.choose_action(action, ref_action,
                                                 dagger_beta)
            else:
                ref_action = action
                exec_action = action

            next_state, extrinsic_reward, done, expired, oob = self.env.step(
                exec_action)

            # Calculate intrinsic reward (I don't like that this delays the loop)
            if hasattr(self.policy,
                       "calc_intrinsic_rewards") and not self.no_reward:
                intrinsic_rewards = self.policy.calc_intrinsic_rewards(
                    next_state, action, done, first)
            else:
                intrinsic_rewards = {"x": 0}
            intrinsic_reward = sum(intrinsic_rewards.values())

            sample = {
                "instruction": string_instruction,
                "ref_action": ref_action,
                "pol_action": action,
                "action": exec_action,
                "state": state,
                "extrinsic_reward": extrinsic_reward,
                "intrinsic_reward": intrinsic_reward - (1.0 if oob else 0.0),
                "full_reward": extrinsic_reward + intrinsic_reward,
                "done": done,
                "expired": expired,
                "env_id": env_id,
                "set_idx": set_idx,
                "seg_idx": seg_idx,
            }
            sample = dict_merge(sample, rl_stuff)
            if not self.no_reward:
                sample = dict_merge(sample, intrinsic_rewards)
            rollout_sample.append(sample)

            # Multiprocessing has stopped playing nice with PyTorch cuda. Move sample to cpu first.
            if rl_rollout:
                self.sample_to_cpu(sample)

            state = next_state
            first = False
            if done:
                #print(f"Done! Last action: {exec_action}")
                break

        md.IS_ROLLOUT = False
        # Add discounted returns
        return rollout_sample
Exemplo n.º 8
0
    def __init__(self, run_name="", domain="sim"):

        super(PVN_Stage1_Bidomain_Original, self).__init__()
        self.model_name = "pvn_stage1"
        self.run_name = run_name
        self.domain = domain
        self.writer = LoggingSummaryWriter(
            log_dir=f"{get_logging_dir()}/runs/{run_name}/{self.domain}")
        #self.writer = DummySummaryWriter()

        self.root_params = get_current_parameters()["ModelPVN"]
        self.params = self.root_params["Stage1"]
        self.use_aux = self.root_params["UseAux"]
        self.aux_weights = self.root_params["AuxWeights"]

        if self.params.get("weight_override"):
            aux_weights_override_name = "AuxWeightsRealOverride" if self.domain == "real" else "AuxWeightsSimOverride"
            aux_weights_override = self.root_params.get(
                aux_weights_override_name)
            if aux_weights_override:
                print(
                    f"Overriding auxiliary weights for domain: {self.domain}")
                self.aux_weights = dict_merge(self.aux_weights,
                                              aux_weights_override)

        self.prof = SimpleProfiler(torch_sync=PROFILE, print=PROFILE)
        self.iter = nn.Parameter(torch.zeros(1), requires_grad=False)

        self.tensor_store = KeyTensorStore()
        self.losses = AuxiliaryLosses()

        # Auxiliary Objectives
        self.do_perturb_maps = self.params["perturb_maps"]
        print("Perturbing maps: ", self.do_perturb_maps)

        # Path-pred FPV model definition
        # --------------------------------------------------------------------------------------------------------------

        self.num_feature_channels = self.params[
            "feature_channels"]  # + params["relevance_channels"]
        self.num_map_channels = self.params["pathpred_in_channels"]

        self.img_to_features_w = FPVToGlobalMap(
            source_map_size=self.params["global_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"],
            res_channels=self.params["resnet_channels"],
            map_channels=self.params["feature_channels"],
            img_w=self.params["img_w"],
            img_h=self.params["img_h"],
            cam_h_fov=self.params["cam_h_fov"],
            domain=domain,
            img_dbg=IMG_DBG)

        self.map_accumulator_w = LeakyIntegratorGlobalMap(
            source_map_size=self.params["global_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"])

        self.add_init_pos_to_coverage = AddDroneInitPosToCoverage(
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"],
            map_size_px=self.params["local_map_size"])

        # Pre-process the accumulated map to do language grounding if necessary - in the world reference frame
        self.map_processor_grounding = LangFilterMapProcessor(
            embed_size=self.params["emb_size"],
            in_channels=self.params["feature_channels"],
            out_channels=self.params["relevance_channels"],
            spatial=False,
            cat_out=False)

        ratio_prior_channels = self.params["feature_channels"]

        # Process the global accumulated map
        self.path_predictor_lingunet = RatioPathPredictor(
            self.params["lingunet"],
            prior_channels_in=self.params["feature_channels"],
            posterior_channels_in=self.params["pathpred_in_channels"],
            dual_head=self.params["predict_confidence"],
            compute_prior=self.params["compute_prior"],
            use_prior=self.params["use_prior_only"],
            oob=self.params["clip_observability"])

        print("UNet Channels: " + str(self.num_map_channels))
        print("Feature Channels: " + str(self.num_feature_channels))

        # TODO:O Verify that config has the same randomization parameters (yaw, pos, etc)
        self.second_transform = self.do_perturb_maps or self.params[
            "predict_in_start_frame"]

        # Sentence Embedding
        self.sentence_embedding = SentenceEmbeddingSimple(
            self.params["word_emb_size"], self.params["emb_size"],
            self.params["emb_layers"], self.params["emb_dropout"])

        self.map_transform_local_to_local = MapTransformer(
            source_map_size=self.params["local_map_size"],
            dest_map_size=self.params["local_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"])

        self.map_transform_global_to_local = MapTransformer(
            source_map_size=self.params["global_map_size"],
            dest_map_size=self.params["local_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"])

        self.map_transform_local_to_global = MapTransformer(
            source_map_size=self.params["local_map_size"],
            dest_map_size=self.params["global_map_size"],
            world_size_px=self.params["world_size_px"],
            world_size_m=self.params["world_size_m"])

        self.map_transform_s_to_p = self.map_transform_local_to_local
        self.map_transform_w_to_s = self.map_transform_global_to_local
        self.map_transform_w_to_r = self.map_transform_global_to_local
        self.map_transform_r_to_s = self.map_transform_local_to_local
        self.map_transform_r_to_w = self.map_transform_local_to_global
        self.map_transform_p_to_w = self.map_transform_local_to_global
        self.map_transform_p_to_r = self.map_transform_local_to_local

        # Batch select is used to drop and forget semantic maps at those timestaps that we're not planning in
        self.batch_select = MapBatchSelect()
        # Since we only have path predictions for some timesteps (the ones not dropped above), we use this to fill
        # in the missing pieces by reorienting the past trajectory prediction into the frame of the current timestep
        self.map_batch_fill_missing = MapBatchFillMissing(
            self.params["local_map_size"], self.params["world_size_px"],
            self.params["world_size_m"])

        self.spatialsoftmax = SpatialSoftmax2d()
        self.visitation_softmax = VisitationSoftmax()

        #TODO:O Use CroppedMapToActionTriplet in Wrapper as Stage2
        # Auxiliary Objectives
        # --------------------------------------------------------------------------------------------------------------

        # We add all auxiliaries that are necessary. The first argument is the auxiliary name, followed by parameters,
        # followed by variable number of names of inputs. ModuleWithAuxiliaries will automatically collect these inputs
        # that have been saved with keep_auxiliary_input() during execution
        if self.use_aux["class_features"]:
            self.losses.add_auxiliary(
                ClassAuxiliary2D("class_features",
                                 self.params["feature_channels"],
                                 self.params["num_landmarks"], 0,
                                 "fpv_features", "lm_pos_fpv", "lm_indices"))
        if self.use_aux["grounding_features"]:
            self.losses.add_auxiliary(
                ClassAuxiliary2D("grounding_features",
                                 self.params["relevance_channels"], 2, 0,
                                 "fpv_features_g", "lm_pos_fpv",
                                 "lm_mentioned"))
        if self.use_aux["class_map"]:
            self.losses.add_auxiliary(
                ClassAuxiliary2D("class_map", self.params["feature_channels"],
                                 self.params["num_landmarks"], 0, "S_W_select",
                                 "lm_pos_map_select", "lm_indices_select"))
        if self.use_aux["grounding_map"]:
            self.losses.add_auxiliary(
                ClassAuxiliary2D("grounding_map",
                                 self.params["relevance_channels"], 2, 0,
                                 "R_W_select", "lm_pos_map_select",
                                 "lm_mentioned_select"))
        # CoRL model uses alignment-model groundings
        if self.use_aux["lang"]:
            # one output for each landmark, 2 classes per output. This is for finetuning, so use the embedding that's gonna be fine tuned
            self.losses.add_auxiliary(
                ClassAuxiliary("lang", self.params["emb_size"], 2,
                               self.params["num_landmarks"], "sentence_embed",
                               "lang_lm_mentioned"))

        if self.use_aux["regularize_map"]:
            self.losses.add_auxiliary(
                FeatureRegularizationAuxiliary2D("regularize_map", "l1",
                                                 "S_W_select"))

        lossfunc = self.params["path_loss_function"]
        if self.params["clip_observability"]:
            self.losses.add_auxiliary(
                PathAuxiliary2D("visitation_dist", lossfunc,
                                self.params["clip_observability"],
                                "log_v_dist_s_select",
                                "v_dist_s_ground_truth_select", "SM_S_select"))
        else:
            self.losses.add_auxiliary(
                PathAuxiliary2D("visitation_dist", lossfunc,
                                self.params["clip_observability"],
                                "log_v_dist_s_select",
                                "v_dist_s_ground_truth_select", "SM_S_select"))

        self.goal_good_criterion = GoalPredictionGoodCriterion(
            ok_distance=self.params["world_size_px"] * 0.1)
        self.goal_acc_meter = MovingAverageMeter(10)
        self.visible_goal_acc_meter = MovingAverageMeter(10)
        self.invisible_goal_acc_meter = MovingAverageMeter(10)
        self.visible_goal_frac_meter = MovingAverageMeter(10)

        self.losses.print_auxiliary_info()

        self.total_goals = 0
        self.correct_goals = 0

        self.env_id = None
        self.env_img = None
        self.seg_idx = None
        self.prev_instruction = None
        self.seq_step = 0

        self.should_save_path_overlays = False