def upload_confusion_matrices(task_id, search_name, search_key):
    search_dir = os.path.join(outputs_dir,
                              f'ray_results/task{task_id}/{search_name}')
    for exp_dir in os.listdir(search_dir):
        if not exp_dir.startswith(search_key):
            continue
        exp_dir_path = os.path.join(search_dir, exp_dir)
        comet_dir = ''
        for f in os.listdir(exp_dir_path):
            if f.startswith('comet'):
                comet_dir = f
        if not comet_dir:
            continue
        comet_exp_key = comet_dir.split('-')[1]
        print(comet_exp_key)
        comet_exp = ExistingExperiment(previous_experiment=comet_exp_key)
        comet_dir_path = os.path.join(exp_dir_path, comet_dir)
        p = re.compile(r'confusion-matrix-epoch-(.*).json')
        for cm_file in os.listdir(comet_dir_path):
            epoch = int(p.findall(cm_file)[0])
            cm_json = json.load(open(os.path.join(comet_dir_path, cm_file)))
            comet_exp._log_asset_data(data=cm_json,
                                      file_name=cm_file,
                                      overwrite=True,
                                      epoch=epoch,
                                      asset_type='confusion-matrix')
Exemplo n.º 2
0
def setup_comet_ml(args, rank):
    # dummy init of experiment so it can be used without error
    # even if comet is disabled
    experiment = Experiment(api_key='dummy_key', disabled=True)
    if args.comet_api_key:
        # initiating comet
        if args.existing_exp_key:
            if rank == 0:
                print("STARTING FROM AND EXISTING EXPERIMENT")
            experiment = ExistingExperiment(
                api_key=args.comet_api_key, workspace=args.comet_workspace,
                project_name=args.project_name, previous_experiment=args.existing_exp_key,
                auto_output_logging="simple", auto_metric_logging=False, parse_args=False,
                disabled=args.disable_comet or rank != 0)
        else:
            if rank == 0:
                print("STARTING A NEW EXPERIMENT")
            experiment = Experiment(
                api_key=args.comet_api_key, workspace=args.comet_workspace,
                project_name=args.project_name, auto_output_logging="simple", auto_metric_logging=False,
                parse_args=False, disabled=args.disable_comet or rank != 0)

    experiment.log_asset('config.yaml')
    experiment.log_asset('config_prod.yaml')
    experiment.log_asset('config_prod_prime.yaml')

    return experiment
Exemplo n.º 3
0
def load_experiment(path_to_yml_file):
    config = load_yaml(path_to_yml_file)
    api_key = os.getenv('COMET_API_KEY', None)
    exp = None

    if not config['info']['experiment_key']:
        if api_key:
            exp = Experiment(api_key=api_key,
                             project_name=config['info']['project_name'])
            exp_key = exp.get_key()
        else:
            exp_key = make_random_string(20)

        os.environ['EXPERIMENT_KEY'] = exp_key

        _env_variables = env_variables + ['EXPERIMENT_KEY']
        config = load_yaml(path_to_yml_file, _env_variables)
        config['info']['experiment_key'] = exp_key
        path_to_yml_file = save_experiment(config, exp)
    else:
        logging.info(
            f"Experiment is already set up @ {config['info']['output_folder']}!"
        )
        try:
            exp = ExistingExperiment(
                api_key=api_key,
                previous_experiment=config['info']['experiment_key'])
        except:
            pass

    return config, exp, path_to_yml_file
Exemplo n.º 4
0
def create_comet_experiment(args):
    if args.resume:
        experiment_key = input("Enter Comet ML key of experiment to resume:")
        experiment = ExistingExperiment(api_key="jBFVYFo9VUsy0kb0lioKXfTmM",
                                        previous_experiment=experiment_key)
    elif args.no_comet:
        experiment = Experiment(api_key="jBFVYFo9VUsy0kb0lioKXfTmM",
                                project_name="test-runs")
    else:
        experiment = Experiment(api_key="jBFVYFo9VUsy0kb0lioKXfTmM",
                                project_name="fastdepth")
    return experiment
Exemplo n.º 5
0
    def __init__(self, disabled, is_existing=False, prev_exp_key=None):
        """
        Handles logging of experiment to comet and also persistence to local file system.
        Supports resumption of stopped experiments.
        """

        if not is_existing:
            self.experiment = Experiment(api_key=COMET_API_KEY,
                                         workspace=COMET_WORKSPACE,
                                         project_name=PROJECT_NAME,
                                         disabled=disabled)
        else:
            if prev_exp_key is None:
                raise ValueError("Requested existing experiment, but no key provided")
            print("Continuing existing experiment with key: ", prev_exp_key)
            self.experiment = ExistingExperiment(api_key=COMET_API_KEY,
                                                 workspace=COMET_WORKSPACE,
                                                 project_name=PROJECT_NAME,
                                                 disabled=disabled,
                                                 previous_experiment=prev_exp_key)
        self.disabled = disabled
        self.name = None
Exemplo n.º 6
0
    def __init__(
        self,
        batch_size: int,
        snapshot_dir: Optional[str] = None,
        snapshot_mode: str = "last",
        snapshot_gap: int = 1,
        exp_set: Optional[str] = None,
        use_print_exp: bool = False,
        saved_exp: Optional[str] = None,
        **kwargs,
    ):
        """
        :param kwargs: passed to comet's Experiment at init.
        """
        if use_print_exp:
            self.experiment = PrintExperiment()
        else:
            from comet_ml import Experiment, ExistingExperiment, OfflineExperiment

            if saved_exp:
                self.experiment = ExistingExperiment(
                    previous_experiment=saved_exp, **kwargs
                )
            else:
                try:
                    self.experiment = Experiment(**kwargs)
                except ValueError:  # no API key
                    log_dir = Path.home() / "logs"
                    log_dir.mkdir(exist_ok=True)
                    self.experiment = OfflineExperiment(offline_directory=str(log_dir))

        self.experiment.log_parameter("complete", False)
        if exp_set:
            self.experiment.log_parameter("exp_set", exp_set)
        if snapshot_dir:
            snapshot_dir = Path(snapshot_dir) / self.experiment.get_key()
        # log_traj_window (int): How many trajectories to hold in deque for computing performance statistics.
        self.log_traj_window = 100
        self._cum_metrics = {
            "n_unsafe_actions": 0,
            "constraint_used": 0,
            "cum_completed_trajs": 0,
            "logging_time": 0,
        }
        self._new_completed_trajs = 0
        self._last_step = 0
        self._start_time = self._last_time = time()
        self._last_snapshot_upload = 0
        self._snaphot_upload_time = 30 * 60

        super().__init__(batch_size, snapshot_dir, snapshot_mode, snapshot_gap)
Exemplo n.º 7
0
def launch_parallel_experiment(gpu_rank, api_key, experiment_keys,
                               experiment_params, repo_path):
    torch.cuda.set_device(gpu_rank)
    param = Parameters()
    param.segment_dataset = False
    param.model_backup_destination = param.model_backup_destination + "/process_{}".format(
        gpu_rank)
    experiment = ExistingExperiment(
        api_key=api_key,
        previous_experiment=experiment_keys[gpu_rank],
        log_env_details=True,
        log_env_gpu=True,
        log_env_cpu=True)
    experiment.params = experiment_params[gpu_rank]
    repo = Repo(repo_path)

    with CometLogger(experiment, gpu_id=gpu_rank, print_to_comet_only=True):
        setup_comet_experiment(experiment, param, repo)
        CometLogger.print("-> loading experiments assets:")
        loss, model, optimizer, train_dataloader, valid_dataloader = load_experiment_assets(
            param)

        if param.train:
            CometLogger.print("~~ Launching the training ~~")
            CometLogger.print(
                "Sleeping {} secs to reduce chances of deadlock.".format(
                    gpu_rank))
            sleep(gpu_rank)

            launch_training(model, train_dataloader, valid_dataloader,
                            optimizer, loss, param)
        if param.test:
            CometLogger.print("~~ Testing the model ~~")
            launch_testing(model, param)

    del train_dataloader, valid_dataloader, model, optimizer, loss
    torch.cuda.empty_cache()
    def __init__(self, comet_params, run_params=None, prev_exp_id=None):
        if prev_exp_id:  # previous experiment
            api_key = comet_params['api_key']
            del comet_params[
                'api_key']  # removing this because the rest of the items need to be passed
            self.experiment = ExistingExperiment(
                api_key=api_key,
                previous_experiment=prev_exp_id,
                **comet_params)
            print(
                f'In CometTracker: ExistingExperiment initialized with id: {prev_exp_id}'
            )

        else:  # new experiment
            self.experiment = Experiment(**comet_params)
            self.experiment.log_parameters(run_params)
Exemplo n.º 9
0
    def get_comet_logger(self):
        if not self.paras.load :
            comet_exp = Experiment(project_name=COMET_PROJECT_NAME,
                                         workspace=COMET_WORKSPACE,
                                         auto_output_logging=None,
                                         auto_metric_logging=None,
                                         display_summary=False,
                                         )
            if self.paras.transfer:
                comet_exp.set_name(self.exp_name)
                comet_exp.add_tag(Path(self.ckpdir).parent.name)
                comet_exp.add_tag('transfer')
                comet_exp.add_tag(self.config['data']['corpus']['metas'][0])
            if self.paras.test:
                comet_exp.set_name(Path(self.paras.outdir).name)
                comet_exp.add_tag(Path(self.paras.config).parents[2].name)
                comet_exp.add_tag('test')
                comet_exp.add_tag(Path(self.paras.config).parent.stem)
                #comet_exp.add_tag(Path(self.paras.outdir).name)
            else:
                comet_exp.add_tag('train')

            for name, param in self.config.items():
                if isinstance(param, dict):
                    comet_exp.log_parameters(param, prefix=name)
                else:
                    comet_exp.log_parameter(name, param)
            comet_exp.log_other('seed', self.paras.seed)


            with open(Path(self.logdir,'exp_key'), 'w') as f:
                print(comet_exp.get_key(),file=f)
        else:
            with open(Path(self.logdir,'exp_key'),'r') as f:
                exp_key = f.read().strip()
                comet_exp = ExistingExperiment(previous_experiment=exp_key,
                                                    project_name=COMET_PROJECT_NAME,
                                                    workspace=COMET_WORKSPACE,
                                                    auto_output_logging=None,
                                                    auto_metric_logging=None,
                                                    display_summary=False,
                                                    )
        return comet_exp
Exemplo n.º 10
0
def log_hyperparameters_to_comet(clf, experiment):
    for i in range(len(clf.cv_results_["params"])):
        exp = Experiment(
            workspace="s0lvang",
            project_name="ideal-pancake-hyperparameter",
            api_key=globals.flags.comet_api_key,
        )
        exp.add_tag("hp_tuning")
        exp.add_tags(globals.comet_logger.get_tags())
        for k, v in clf.cv_results_.items():
            if k == "params":
                exp.log_parameters(v[i])
            else:
                exp.log_metric(k, v[i])
        exp.end()

    old_experiment = ExistingExperiment(
        api_key=globals.flags.comet_api_key,
        previous_experiment=experiment.get_key(),
    )
    globals.comet_logger = old_experiment
Exemplo n.º 11
0
 def _init_comet(self):
     """
     For more information on comet, see our doc/Getting Started
     """
     try:
         if self.comet_key:
             self.comet_exp = ExistingExperiment(
                 previous_experiment=self.comet_key)
         elif self.comet_workspace:
             # New experiment
             # Use trainset name as comet project name
             project_name = self.comet_project
             self.comet_exp = CometExperiment(
                 project_name=project_name,
                 workspace=self.comet_workspace,
                 log_code=False,
                 log_graph=True,
                 auto_param_logging=True,
                 auto_metric_logging=False,
                 parse_args=False,
                 auto_output_logging='native',
                 log_env_details=True,
                 log_env_gpu=True,
                 log_env_cpu=True,
                 log_env_host=False,
                 log_git_metadata=True,
                 log_git_patch=True,
                 display_summary=False)
             self.comet_exp.set_name(self.experiment_name)
             self.comet_exp.log_parameters(self.params)
             self.comet_key = self.comet_exp.get_key()
     except ConnectionError:
         self.logger.warning(
             "Could not connect to Comet.ml, metrics will not be logged "
             "online...")
         self.comet_exp = None
         self.comet_key = None
Exemplo n.º 12
0
                            ])
        logger = logging.getLogger()
        logger.info("Running new experiment")
        ex = Experiment(api_key=config.log.comet.api_key,
                        workspace=config.log.comet.workspace,
                        project_name=config.log.comet.project_name,
                        disabled=True,
                        auto_output_logging=None,
                        log_code=False)
        name = 'exp_{}'.format(config_id)
        config.general.exp_name = name
        ex.log_parameters(flatten_dictionary(config))
        ex.set_name(name)
        start(config, ex)
    else:
        logging.info("Resuming old experiment with id {}".format(exp_id))
        config = get_config(config_id=config_id)
        logger = logging.getLogger()
        ex = ExistingExperiment(
            api_key=config.log.comet.api_key,
            previous_experiment=exp_id,
            workspace=config.log.comet.workspace,
            project_name=config.log.comet.project_name,
            disabled=config.log.comet.disabled,
            auto_output_logging=None,
            log_code=False,
        )
        name = 'exp_{}'.format(config_id)
        config.general.exp_name = name
        resume(config, ex)
Exemplo n.º 13
0
def main():
    args = get_args()
    results_filename = f"logs/{args.env_name}-seed-{args.seed}-num-steps-{args.num_steps}-num-env-steps-{args.num_env_steps}-results.csv"
    save_path = os.path.join(args.save_dir, args.algo, str(args.seed))

    torch.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)

    if args.cuda and torch.cuda.is_available() and args.cuda_deterministic:
        torch.backends.cudnn.benchmark = False
        torch.backends.cudnn.deterministic = True

    log_dir = os.path.join(save_path, args.env_name)
    eval_log_dir = log_dir + "_eval"
    utils.cleanup_log_dir(log_dir)
    utils.cleanup_log_dir(eval_log_dir)

    torch.set_num_threads(1)
    device = torch.device("cuda:0" if args.cuda else "cpu")

    envs = make_vec_envs(args.env_name, args.seed, args.num_processes,
                         args.gamma, log_dir, device, False, args.custom_gym)

    if "Train" in args.env_name:
        test_envs = make_vec_envs(args.env_name.replace("Train", "Test"),
                                  args.seed, 1, args.gamma, log_dir, device,
                                  False, args.custom_gym)
    base = NaviBaseTemp
    obs_shape = envs.observation_space.shape

    save_j = 0
    try:
        os.makedirs(save_path)
    except FileExistsError:
        pass

    # Recover from job pre-emption
    try:
        actor_critic, ob_rms = \
                    torch.load(os.path.join(save_path, args.env_name + ".pt"), map_location='cpu')
        j = json.load(
            open(os.path.join(save_path, args.env_name + "-state.json"), 'r'))
        save_j = j['save_j']
        episode_total = j['episode_total']
        test_episode_total = j['test_episode_total']

        rollouts = pickle.load(
            open(os.path.join(save_path, args.env_name + "-rollout.pkl"),
                 'rb'))
        rollouts.to(device)
        obs = envs.reset()
        rollouts.obs[0].copy_(obs)
        rollouts.to(device)

        test_rollouts = pickle.load(
            open(os.path.join(save_path, args.env_name + "-test-rollout.pkl"),
                 'rb'))
        test_rollouts.to(device)
        test_obs = test_envs.reset()
        test_rollouts.obs[0].copy_(test_obs)
        test_rollouts.to(device)

        optimizer_state_dict = pickle.load(
            open(
                os.path.join(save_path,
                             args.env_name + "-optim-state-dict.pkl"), 'rb'))
        episode_rewards = pickle.load(
            open(
                os.path.join(save_path,
                             args.env_name + "-episode_rewards.pkl"), 'rb'))
        episode_length = pickle.load(
            open(
                os.path.join(save_path, args.env_name + "-episode_length.pkl"),
                'rb'))
        episode_success_rate = pickle.load(
            open(
                os.path.join(save_path,
                             args.env_name + "-episode_success_rate.pkl"),
                'rb'))
        test_episode_rewards = pickle.load(
            open(
                os.path.join(save_path,
                             args.env_name + "-test_episode_rewards.pkl"),
                'rb'))
        test_episode_length = pickle.load(
            open(
                os.path.join(save_path,
                             args.env_name + "-test_episode_length.pkl"),
                'rb'))
        test_episode_success_rate = pickle.load(
            open(
                os.path.join(save_path,
                             args.env_name + "-test_episode_success_rate.pkl"),
                'rb'))

        if comet_loaded and len(args.comet) > 0:
            comet_credentials = args.comet.split("/")
            experiment = ExistingExperiment(api_key=comet_credentials[2],
                                            previous_experiment=j['comet_id'])
            for key, value in vars(args).items():
                experiment.log_parameter(key, value)
        else:
            experiment = None
            with open(results_filename, "a") as f:
                for key, value in vars(args).items():
                    f.write(f"{key}, {value}\n")
                f.close()

    except Exception:
        # create a new model
        actor_critic = Policy(
            obs_shape,
            envs.action_space,
            base_kwargs={'recurrent': args.recurrent_policy},
            base=base,
        )
        rollouts = RolloutStorage(args.num_steps, args.num_processes,
                                  envs.observation_space.shape,
                                  envs.action_space,
                                  actor_critic.recurrent_hidden_state_size)
        obs = envs.reset()
        rollouts.obs[0].copy_(obs)
        rollouts.to(device)

        test_rollouts = RolloutStorage(
            args.num_steps, 1, envs.observation_space.shape, envs.action_space,
            actor_critic.recurrent_hidden_state_size)
        if "Train" in args.env_name:
            test_obs = test_envs.reset()
            test_rollouts.obs[0].copy_(test_obs)
            test_rollouts.to(device)

        episode_rewards = deque(maxlen=10)
        episode_length = deque(maxlen=10)
        episode_success_rate = deque(maxlen=100)
        episode_total = 0

        test_episode_rewards = deque(maxlen=10)
        test_episode_length = deque(maxlen=10)
        test_episode_success_rate = deque(maxlen=100)
        test_episode_total = 0

        if comet_loaded and len(args.comet) > 0:
            comet_credentials = args.comet.split("/")
            experiment = Experiment(api_key=comet_credentials[2],
                                    project_name=comet_credentials[1],
                                    workspace=comet_credentials[0])
            for key, value in vars(args).items():
                experiment.log_parameter(key, value)
        else:
            experiment = None
            with open(results_filename, "w+") as f:
                for key, value in vars(args).items():
                    f.write(f"{key}, {value}\n")
                f.close()

    actor_critic.to(device)

    if args.algo == 'ppo':
        agent = algo.PPO(actor_critic,
                         args.clip_param,
                         args.ppo_epoch,
                         args.num_mini_batch,
                         args.value_loss_coef,
                         args.entropy_coef,
                         lr=args.lr,
                         eps=args.eps,
                         max_grad_norm=args.max_grad_norm)
    elif args.algo == 'random':
        agent = algo.RANDOM_AGENT(actor_critic, args.value_loss_coef,
                                  args.entropy_coef)

        actor_critic = RandomPolicy(
            obs_shape,
            envs.action_space,
            base_kwargs={'recurrent': args.recurrent_policy},
            base=base,
        )
    try:
        agent.optimizer.load_state_dict(optimizer_state_dict)
    except Exception:
        pass

    start = time.time()
    num_updates = int(
        args.num_env_steps) // args.num_steps // args.num_processes
    for j in range(num_updates - save_j):
        j = j + save_j
        if args.use_linear_lr_decay:
            # decrease learning rate linearly
            utils.update_linear_schedule(agent.optimizer, j, num_updates,
                                         args.lr)

        print("args.num_steps: " + str(args.num_steps))
        for step in range(args.num_steps):
            # Sample actions
            with torch.no_grad():
                value, action, action_log_prob, recurrent_hidden_states = actor_critic.act(
                    rollouts.obs[step], rollouts.recurrent_hidden_states[step],
                    rollouts.masks[step])

            # Observe reward and next obs
            obs, reward, done, infos = envs.step(action)
            for idx, info in enumerate(infos):
                if 'episode' in info.keys():
                    episode_rewards.append(info['episode']['r'])
                    episode_length.append(info['episode']['l'])
                    if "Explorer" not in args.env_name:
                        episode_success_rate.append(
                            info['was_successful_trajectory'])
                    episode_total += 1

            # If done then clean the history of observations.
            masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                       for done_ in done])
            bad_masks = torch.FloatTensor(
                [[0.0] if 'bad_transition' in info.keys() else [1.0]
                 for info in infos])
            rollouts.insert(obs, recurrent_hidden_states, action,
                            action_log_prob, value, reward, masks, bad_masks)

        with torch.no_grad():
            next_value = actor_critic.get_value(
                rollouts.obs[-1], rollouts.recurrent_hidden_states[-1],
                rollouts.masks[-1]).detach()

        rollouts.compute_returns(next_value, args.use_gae, args.gamma,
                                 args.gae_lambda, args.use_proper_time_limits)

        value_loss, action_loss, dist_entropy = agent.update(rollouts)

        rollouts.after_update()

        # Run on test
        if "Train" in args.env_name:
            for step in range(args.num_steps):
                # Sample actions
                with torch.no_grad():
                    value, action, action_log_prob, recurrent_hidden_states = actor_critic.act(
                        test_rollouts.obs[step],
                        test_rollouts.recurrent_hidden_states[step],
                        test_rollouts.masks[step])

                    # Observe reward and next obs
                    obs, reward, done, infos = test_envs.step(action)
                    for idx, info in enumerate(infos):
                        if 'episode' in info.keys():
                            test_episode_rewards.append(info['episode']['r'])
                            test_episode_length.append(info['episode']['l'])
                            test_episode_success_rate.append(
                                info['was_successful_trajectory'])
                            test_episode_total += 1

        # save for every interval-th episode or for the last epoch
        if (j % args.save_interval == 0 or j == num_updates -
                1) and args.save_dir != "" and j > args.save_after:
            if args.save_multiple:
                torch.save([
                    actor_critic,
                    getattr(utils.get_vec_normalize(envs), 'ob_rms', None)
                ], os.path.join(save_path,
                                str(j) + "-" + args.env_name + ".pt"))
            else:
                torch.save([
                    actor_critic,
                    getattr(utils.get_vec_normalize(envs), 'ob_rms', None)
                ], os.path.join(save_path, args.env_name + ".pt"))
                json.dump(
                    {
                        'save_j': j,
                        'episode_total': episode_total,
                        'test_episode_total': test_episode_total,
                        'comet_id': experiment.id
                    },
                    open(
                        os.path.join(save_path, args.env_name + "-state.json"),
                        'w+'))
                pickle.dump(
                    agent.optimizer.state_dict(),
                    open(
                        os.path.join(save_path,
                                     args.env_name + "-optim-state-dict.pkl"),
                        'wb+'))
                pickle.dump(
                    rollouts,
                    open(
                        os.path.join(save_path,
                                     args.env_name + "-rollout.pkl"), 'wb+'))
                pickle.dump(
                    test_rollouts,
                    open(
                        os.path.join(save_path,
                                     args.env_name + "-test-rollout.pkl"),
                        'wb+'))
                pickle.dump(
                    episode_rewards,
                    open(
                        os.path.join(save_path,
                                     args.env_name + "-episode_rewards.pkl"),
                        'wb+'))
                pickle.dump(
                    episode_length,
                    open(
                        os.path.join(save_path,
                                     args.env_name + "-episode_length.pkl"),
                        'wb+'))
                pickle.dump(
                    episode_success_rate,
                    open(
                        os.path.join(
                            save_path,
                            args.env_name + "-episode_success_rate.pkl"),
                        'wb+'))
                pickle.dump(
                    test_episode_rewards,
                    open(
                        os.path.join(
                            save_path,
                            args.env_name + "-test_episode_rewards.pkl"),
                        'wb+'))
                pickle.dump(
                    test_episode_length,
                    open(
                        os.path.join(
                            save_path,
                            args.env_name + "-test_episode_length.pkl"),
                        'wb+'))
                pickle.dump(
                    test_episode_success_rate,
                    open(
                        os.path.join(
                            save_path,
                            args.env_name + "-test_episode_success_rate.pkl"),
                        'wb+'))

        if j % args.log_interval == 0 and len(episode_rewards) > 1:
            total_num_steps = (j + 1) * args.num_processes * args.num_steps
            end = time.time()
            if experiment is not None:
                experiment.log_metric("Reward Mean",
                                      np.mean(episode_rewards),
                                      step=total_num_steps)
                experiment.log_metric("Reward Min",
                                      np.min(episode_rewards),
                                      step=total_num_steps)
                experiment.log_metric("Reward Max",
                                      np.max(episode_rewards),
                                      step=total_num_steps)
                experiment.log_metric("Episode Length Mean ",
                                      np.mean(episode_length),
                                      step=total_num_steps)
                experiment.log_metric("Episode Length Min",
                                      np.min(episode_length),
                                      step=total_num_steps)
                experiment.log_metric("Episode Length Max",
                                      np.max(episode_length),
                                      step=total_num_steps)
                experiment.log_metric("# Trajectories (Total)",
                                      j,
                                      step=total_num_steps)
                if "Explorer" not in args.env_name:
                    experiment.log_metric("Episodic Success Rate",
                                          np.mean(episode_success_rate),
                                          step=total_num_steps)
            else:
                with open(results_filename, "a") as f:
                    f.write(
                        f"Reward Mean, {np.mean(episode_rewards)}, {total_num_steps}\n"
                    )
                    f.write(
                        f"Reward Min, {np.min(episode_rewards)}, {total_num_steps}\n"
                    )
                    f.write(
                        f"Reward Max, {np.max(episode_rewards)}, {total_num_steps}\n"
                    )
                    f.write(
                        f"Episode Length Mean, {np.mean(episode_rewards)}, {total_num_steps}\n"
                    )
                    f.write(
                        f"Episode Length Min, {np.min(episode_rewards)}, {total_num_steps}\n"
                    )
                    f.write(
                        f"Episode Length Max, {np.max(episode_rewards)}, {total_num_steps}\n"
                    )
                    f.write(
                        f"# Trajectories (Total), {j}, {total_num_steps}\n")
                    if "Explorer" not in args.env_name:
                        f.write(
                            f"Episodic Success Rate, {np.mean(episode_success_rate)}, {total_num_steps}\n"
                        )
                    f.close()
            print(
                "Updates {}, num timesteps {}, FPS {} \n Last {} training episodes: mean/median reward {:.1f}/{:.1f}, min/max reward {:.1f}/{:.1f}\n"
                .format(j, total_num_steps,
                        int(total_num_steps / (end - start)),
                        len(episode_rewards), np.mean(episode_rewards),
                        np.median(episode_rewards), np.min(episode_rewards),
                        np.max(episode_rewards), dist_entropy, value_loss,
                        action_loss))

            # Test Generalization
            if "Train" in args.env_name and j % args.log_interval == 0 and len(
                    test_episode_rewards) > 1:
                masks = torch.FloatTensor([[0.0] if done_ else [1.0]
                                           for done_ in done])
                bad_masks = torch.FloatTensor(
                    [[0.0] if 'bad_transition' in info.keys() else [1.0]
                     for info in infos])
                test_rollouts.insert(obs, recurrent_hidden_states, action,
                                     action_log_prob, value, reward, masks,
                                     bad_masks)

                with torch.no_grad():
                    next_value = actor_critic.get_value(
                        test_rollouts.obs[-1],
                        test_rollouts.recurrent_hidden_states[-1],
                        test_rollouts.masks[-1]).detach()
                test_rollouts.after_update()

                print(
                    f"Test Episode Total: {test_episode_total}, Mean Test rewards: {np.mean(test_episode_rewards)}, Test Episode Length: {np.mean(test_episode_length)}, Test Episode Success Rate: {np.mean(test_episode_success_rate)}"
                )
                test_total_num_steps = (j + 1) * args.num_steps
                experiment.log_metric("Test Reward Mean",
                                      np.mean(test_episode_rewards),
                                      step=test_total_num_steps)
                experiment.log_metric("Test Reward Min",
                                      np.min(test_episode_rewards),
                                      step=test_total_num_steps)
                experiment.log_metric("Test Reward Max",
                                      np.max(test_episode_rewards),
                                      step=test_total_num_steps)
                experiment.log_metric("Test Episode Length Mean ",
                                      np.mean(test_episode_length),
                                      step=test_total_num_steps)
                experiment.log_metric("Test Episode Length Min",
                                      np.min(test_episode_length),
                                      step=test_total_num_steps)
                experiment.log_metric("Test Episode Length Max",
                                      np.max(test_episode_length),
                                      step=test_total_num_steps)
                experiment.log_metric("# Test Trajectories (Total)", j)
                experiment.log_metric("Test Episodic Success Rate",
                                      np.mean(test_episode_success_rate),
                                      step=test_total_num_steps)

        if (args.eval_interval is not None and len(episode_rewards) > 1
                and j % args.eval_interval == 0):
            ob_rms = utils.get_vec_normalize(envs).ob_rms
            evaluate(actor_critic, ob_rms, args.env_name, args.seed,
                     args.num_processes, eval_log_dir, device)
Exemplo n.º 14
0
def main(args, config=None, init_distributed=False):
    utils.import_user_module(args)

    experiment = None
    if config:
        experiment = ExistingExperiment(
            api_key=config["api_key"],
            previous_experiment=config["experiment_key"],
            auto_output_logging=None,
        )

    assert (
        args.max_tokens is not None or args.max_sentences is not None
    ), "Must specify batch size either with --max-tokens or --max-sentences"

    # Initialize CUDA and distributed training
    if torch.cuda.is_available() and not args.cpu:
        torch.cuda.set_device(args.device_id)
    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    if init_distributed:
        args.distributed_rank = distributed_utils.distributed_init(args)

    if distributed_utils.is_master(args):
        checkpoint_utils.verify_checkpoint_directory(args.save_dir)

    print(args)
    if experiment:
        experiment.log_parameters(vars(args),
                                  prefix="Device {} :: ".format(
                                      args.device_id))

    # Setup task, e.g., translation, language modeling, etc.
    task = tasks.setup_task(args)

    # Load valid dataset (we load training data below, based on the latest checkpoint)
    for valid_sub_split in args.valid_subset.split(","):
        task.load_dataset(valid_sub_split, combine=False, epoch=0)

    # Build model and criterion
    model = task.build_model(args)
    criterion = task.build_criterion(args)
    print(model)
    print("| model {}, criterion {}".format(args.arch,
                                            criterion.__class__.__name__))
    print("| num. model params: {} (num. trained: {})".format(
        sum(p.numel() for p in model.parameters()),
        sum(p.numel() for p in model.parameters() if p.requires_grad),
    ))

    if experiment:
        experiment.log_parameters(
            {
                "criterion":
                criterion.__class__.__name__,
                "num. model params":
                sum(p.numel() for p in model.parameters()),
                "num. trained params":
                sum(p.numel() for p in model.parameters() if p.requires_grad),
            },
            prefix="Device {} :: ".format(args.device_id),
        )

    # Build trainer
    trainer = Trainer(args, task, model, criterion)
    print("| training on {} GPUs".format(args.distributed_world_size))
    print("| max tokens per GPU = {} and max sentences per GPU = {}".format(
        args.max_tokens, args.max_sentences))

    # Load the latest checkpoint if one is available and restore the
    # corresponding train iterator
    extra_state, epoch_itr = checkpoint_utils.load_checkpoint(args, trainer)

    # Train until the learning rate gets too small
    max_epoch = args.max_epoch or math.inf
    max_update = args.max_update or math.inf
    lr = trainer.get_lr()
    train_meter = StopwatchMeter()
    train_meter.start()
    valid_subsets = args.valid_subset.split(",")
    while (lr > args.min_lr and epoch_itr.epoch < max_epoch
           and trainer.get_num_updates() < max_update):
        # train for one epoch
        train(args, trainer, task, epoch_itr, experiment)

        if (not args.disable_validation
                and epoch_itr.epoch % args.validate_interval == 0):
            valid_losses = validate(args, trainer, task, epoch_itr,
                                    valid_subsets, experiment)
        else:
            valid_losses = [None]

        # only use first validation loss to update the learning rate
        lr = trainer.lr_step(epoch_itr.epoch, valid_losses[0])

        # save checkpoint
        if epoch_itr.epoch % args.save_interval == 0:
            checkpoint_utils.save_checkpoint(args, trainer, epoch_itr,
                                             valid_losses[0])

        reload_dataset = ":" in getattr(args, "data", "")
        # sharded data: get train iterator for next epoch
        epoch_itr = trainer.get_train_iterator(epoch_itr.epoch,
                                               load_dataset=reload_dataset)
    train_meter.stop()
    print("| done training in {:.1f} seconds".format(train_meter.sum))

    if experiment:
        experiment.log_metrics(
            {
                "valid_loss": valid_losses[0],
                "lr": lr
            },
            prefix="Device {} ".format(args.device_id),
        )
Exemplo n.º 15
0
class CometLogger():
    def __init__(self, enabled, is_existing=False, prev_exp_key=None):
        """
        Handles logging of experiment to comet and also persistence to local file system.
        Supports resumption of stopped experiments.
        """
        disabled = not enabled

        if not is_existing:
            self.experiment = Experiment(api_key=COMET_API_KEY,
                                         workspace=COMET_WORKSPACE,
                                         project_name=PROJECT_NAME,
                                         disabled=disabled)
        else:
            if prev_exp_key is None:
                raise ValueError(
                    "Requested existing experiment, but no key provided")
            print("Continuing existing experiment with key: ", prev_exp_key)
            self.experiment = ExistingExperiment(
                api_key=COMET_API_KEY,
                workspace=COMET_WORKSPACE,
                project_name=PROJECT_NAME,
                disabled=disabled,
                previous_experiment=prev_exp_key)
        self.disabled = disabled

    def get_experiment_key(self):
        return self.experiment.get_key()[:9]

    def add_tag(self, tag):
        self.experiment.add_tag(tag)

    def log_metric(self, name, value, step=None):
        self.experiment.log_metric(name, value, step=step)

    def log_metrics(self, metrics_dict, prefix, step=None):
        self.experiment.log_metrics(metrics_dict, prefix=prefix, step=step)

    def log_params(self, params_dict):
        self.experiment.log_parameters(params_dict)

    def set_name(self, name_str):
        self.experiment.set_name(name_str)

    def log_dataset(self, dataset: SpeakerVerificationDataset):
        if self.disabled:
            return
        dataset_string = ""
        dataset_string += "<b>Speakers</b>: %s\n" % len(dataset.speakers)
        dataset_string += "\n" + dataset.get_logs()
        dataset_string = dataset_string.replace("\n", "<br>")
        self.vis.text(dataset_string, opts={"title": "Dataset"})

    def log_implementation(self, params):
        if self.disabled:
            return
        implementation_string = ""
        for param, value in params.items():
            implementation_string += "<b>%s</b>: %s\n" % (param, value)
            implementation_string = implementation_string.replace("\n", "<br>")
        self.implementation_string = implementation_string
        self.implementation_win = self.vis.text(
            implementation_string, opts={"title": "Training implementation"})

    def draw_projections(self,
                         embeds,
                         utterances_per_speaker,
                         step,
                         out_fpath=None,
                         max_speakers=16):
        if self.disabled:
            return
        max_speakers = min(max_speakers, len(colormap))
        embeds = embeds[:max_speakers * utterances_per_speaker]

        n_speakers = len(embeds) // utterances_per_speaker
        ground_truth = np.repeat(np.arange(n_speakers), utterances_per_speaker)
        colors = [colormap[i] for i in ground_truth]

        reducer = umap.UMAP()
        projected = reducer.fit_transform(embeds)
        plt.scatter(projected[:, 0], projected[:, 1], c=colors)
        plt.gca().set_aspect("equal", "datalim")
        plt.title("UMAP projection (step %d)" % step)
        if out_fpath is not None:
            plt.savefig(out_fpath)
        plt.clf()
        self.experiment.log_image(out_fpath, step=step)
Exemplo n.º 16
0
    def __init__(self, args=args):
        super().__init__()
        self.args = args
        # random_seed setting
        random_seed = args.randomseed
        np.random.seed(random_seed)
        torch.manual_seed(random_seed)
        if torch.cuda.device_count() > 1:
            torch.cuda.manual_seed_all(random_seed)
        else:
            torch.cuda.manual_seed(random_seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.slomo = model.Slomo(self.args.data_h, self.args.data_w, self.device)
        self.slomo.to(self.device)
        if self.args.init_type != "":
            init_net(self.slomo, self.args.init_type)
            print(self.args.init_type + " initializing slomo done!")
        if self.args.train_continue:
            if not self.args.nocomet and self.args.cometid != "":
                self.comet_exp = ExistingExperiment(
                    previous_experiment=self.args.cometid
                )
            elif not self.args.nocomet and self.args.cometid == "":
                self.comet_exp = Experiment(
                    workspace=self.args.workspace, project_name=self.args.projectname
                )
            else:
                self.comet_exp = None
            self.ckpt_dict = torch.load(self.args.checkpoint)
            self.slomo.load_state_dict(self.ckpt_dict["model_state_dict"])
            self.args.init_learning_rate = self.ckpt_dict["learningRate"]
            self.optimizer = optim.Adam(
                self.slomo.parameters(), lr=self.args.init_learning_rate
            )
            self.optimizer.load_state_dict(self.ckpt_dict["opt_state_dict"])
            print("Pretrained model loaded!")
        else:
            # start logging info in comet-ml
            if not self.args.nocomet:
                self.comet_exp = Experiment(
                    workspace=self.args.workspace, project_name=self.args.projectname
                )
                # self.comet_exp.log_parameters(flatten_opts(self.args))
            else:
                self.comet_exp = None
            self.ckpt_dict = {
                "trainLoss": {},
                "valLoss": {},
                "valPSNR": {},
                "valSSIM": {},
                "learningRate": {},
                "epoch": -1,
                "detail": "End to end Super SloMo.",
                "trainBatchSz": self.args.train_batch_size,
                "validationBatchSz": self.args.validation_batch_size,
            }
            self.optimizer = optim.Adam(
                self.slomo.parameters(), lr=self.args.init_learning_rate
            )
        self.scheduler = optim.lr_scheduler.MultiStepLR(
            self.optimizer, milestones=self.args.milestones, gamma=0.1
        )
        # Channel wise mean calculated on adobe240-fps training dataset
        mean = [0.5, 0.5, 0.5]
        std = [1, 1, 1]
        self.normalize = transforms.Normalize(mean=mean, std=std)
        self.transform = transforms.Compose([transforms.ToTensor(), self.normalize])

        trainset = dataloader.SuperSloMo(
            root=self.args.dataset_root + "/train", transform=self.transform, train=True
        )
        self.trainloader = torch.utils.data.DataLoader(
            trainset,
            batch_size=self.args.train_batch_size,
            num_workers=self.args.num_workers,
            shuffle=True,
        )

        validationset = dataloader.SuperSloMo(
            root=self.args.dataset_root + "/validation",
            transform=self.transform,
            # randomCropSize=(128, 128),
            train=False,
        )
        self.validationloader = torch.utils.data.DataLoader(
            validationset,
            batch_size=self.args.validation_batch_size,
            num_workers=self.args.num_workers,
            shuffle=False,
        )
        ### loss
        self.supervisedloss = supervisedLoss()
        self.best = {
            "valLoss": 99999999,
            "valPSNR": -1,
            "valSSIM": -1,
        }
        self.checkpoint_counter = int(
            (self.ckpt_dict["epoch"] + 1) / self.args.checkpoint_epoch
        )
Exemplo n.º 17
0
def main(args):
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)

    print('Loading data')
    data = np.load(args.boards_file, allow_pickle=True)
    idxs = data['idxs']
    labels = data['values'] 
    mask = labels != None
    idxs = idxs[mask]
    labels = labels[mask]
    n = len(idxs)

    if args.shuffle:
        perm = np.random.permutation(n)
        idxs = idxs[perm]
        labels = labels[perm]

    if args.experiment is None:
        experiment = Experiment(project_name="chess-axia")
        experiment.log_parameters(vars(args))
    else:
        experiment = ExistingExperiment(previous_experiment=args.experiment)
    key = experiment.get_key()

    print(f'Number of Boards: {n}')

    if torch.cuda.is_available() and args.num_gpus > 0:
        device = torch.device('cuda:0')
    else:
        device = torch.device('cpu')

    if args.num_train is None:
        args.num_train = n - args.num_test
    if args.num_train + args.num_test > n:
        raise ValueError('num-train and num-test sum to more than dataset size')
    train_idxs = idxs[:args.num_train]
    test_idxs = idxs[-args.num_test:]

    train_labels = labels[:-args.num_test]
    test_labels = labels[-args.num_test:]
    #print(f'Win percentage: {sum(train_labels)/ len(train_labels):.1%}')
    print('Train size: ' + str(len(train_labels)))

    train_loader = DataLoader(BoardAndPieces(train_idxs, train_labels),
                              batch_size=args.batch_size, collate_fn=collate_fn,
                              shuffle=True)
    test_loader = DataLoader(BoardAndPieces(test_idxs, test_labels),
                             batch_size=args.batch_size, collate_fn=collate_fn)

    ae = AutoEncoder().to(device)
    ae_file = append_to_modelname(args.ae_model, args.ae_iter)
    ae.load_state_dict(torch.load(ae_file))

    model = BoardValuator(ae).to(device)
    loss_fn = model.loss_fn
    model = DataParallel(model)
    if args.model_loadname:
        model.load_state_dict(torch.load(args.model_loadname))

    if args.ae_freeze:
        print('Freezing AE model')
        for param in ae.parameters():
            param.requires_grad = False

    if torch.cuda.device_count() > 1 and args.num_gpus > 1:
        model = torch.nn.DataParallel(model)

    optimizer = optim.Adam(model.parameters(), lr=args.lr)

    #cum_acc = cum_loss = count = 0
    total_iters = args.init_iter

    for epoch in range(args.init_epoch, args.epochs):
        print(f'Running epoch {epoch} / {args.epochs}\n')
        #for batch_idx, (input, mask, label) in tqdm(enumerate(train_loader),
        #                             total=len(train_loader)):
        for batch_idx, (input, mask, label) in enumerate(train_loader):

            model.train()

            input = to(input, device)
            mask = to(mask, device)
            label = to(label, device)

            optimizer.zero_grad()
            output = model(input, mask)
            loss = loss_fn(output, label)
            loss.backward()
            optimizer.step()

            cum_loss += loss.item()
            # cum_acc += acc.item()
            count += 1

            if total_iters % args.log_interval == 0:
                tqdm.write(f'Epoch: {epoch}\t Iter: {total_iters:>6}\t Loss: {loss.item():.5f}')
                # experiment.log_metric('accuracy', cum_acc / count,
                #                       step=total_iters)
                experiment.log_metric('loss', cum_loss / count,
                                      step=total_iters)
                experiment.log_metric('loss_', cum_loss / count,
                                      step=total_iters)
                #cum_acc = cum_loss = count = 0

            if total_iters % args.save_interval == 0:
                path = get_modelpath(args.model_dirname, key,
                                     args.model_savename, iter=total_iters,
                                     epoch=epoch)
                dirname = os.path.dirname(path)
                if not os.path.exists(dirname):
                    os.makedirs(dirname)
                torch.save(model.state_dict(), path)

            if total_iters % args.eval_interval == 0 and total_iters != 0:
                loss = eval_loss(model, test_loader, device, loss_fn)
                tqdm.write(f'\tTEST: Loss: {loss:.5f}')
                #experiment.log_metric('test accuracy', acc, step=total_iters,
                #                      epoch=epoch)
                experiment.log_metric('test loss', loss, step=total_iters,
                                      epoch=epoch)
            total_iters += 1
Exemplo n.º 18
0
            name=f"{str(epoch)}_#{logidx}",
        )


if __name__ == "__main__":

    input_dim = args.data_h * args.data_w
    batch_size = args.batchsize
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    encoder = Encoder(input_dim, 256, 256)
    decoder = Decoder(args.ld, 256, input_dim)
    vae = VAE(encoder, decoder)
    if args.train_continue:
        if not args.nocomet:
            comet_exp = ExistingExperiment(previous_experiment=args.cometid)
        else:
            comet_exp = None
        dict1 = torch.load(args.checkpoint)
        vae.load_state_dict(dict1["state_dict"])
        checkpoint_counter = dict1["checkpoint_counter"]
        optimizer = optim.Adam(vae.parameters(), lr=dict1["learningRate"])
    else:
        # start logging info in comet-ml
        if not args.nocomet:
            comet_exp = Experiment(workspace=args.workspace,
                                   project_name=args.projectname)
            # comet_exp.log_parameters(flatten_opts(args))
        else:
            comet_exp = None
        dict1 = {
Exemplo n.º 19
0
def experiment(variant, comet_exp_key=None):
    comet_logger = None
    if comet_exp_key is not None:
        # from rllab.misc.comet_logger import CometContinuedLogger, CometLogger
        # from comet_ml import Experiment, ExistingExperiment
        # comet_log = CometContinuedLogger(api_key="KWwx7zh6I2uw6oQMkpEo3smu0", previous_experiment_key=variant['comet_exp_key'])
        comet_logger = ExistingExperiment(
            api_key="KWwx7zh6I2uw6oQMkpEo3smu0",
            previous_experiment=variant['comet_exp_key'])
        # comet_log = CometLogger(api_key="KWwx7zh6I2uw6oQMkpEo3smu0",
        #                     project_name="ml4l3", workspace="glenb")
        comet_logger.set_name("test seq train")
        # comet_log = comet_exp_key
        print("RL!: ", comet_logger)
    print("%%%%%%%%%%%%%%%%%", comet_logger)
    seed = variant['seed']
    log_dir = variant['log_dir']
    n_parallel = variant['n_parallel']

    setup(seed, n_parallel, log_dir)

    init_file = variant['init_file']
    taskIndex = variant['taskIndex']
    n_itr = variant['n_itr']
    default_step = variant['default_step']
    policyType = variant['policyType']
    envType = variant['envType']

    tasksFile = path_to_multiworld + '/multiworld/envs/goals/' + variant[
        'tasksFile'] + '.pkl'
    tasks = pickle.load(open(tasksFile, 'rb'))

    max_path_length = variant['max_path_length']

    use_images = 'conv' in policyType
    print("$$$$$$$$$$$$$$$ RL-TASK: ", str(tasks[taskIndex]),
          " $$$$$$$$$$$$$$$")
    if 'MultiDomain' in envType:
        baseEnv = Sawyer_MultiDomainEnv(tasks=tasks,
                                        image=use_images,
                                        mpl=max_path_length)

    elif 'Push' in envType:
        baseEnv = SawyerPushEnv(tasks=tasks,
                                image=use_images,
                                mpl=max_path_length)

    elif 'PickPlace' in envType:
        baseEnv = SawyerPickPlaceEnv(tasks=tasks,
                                     image=use_images,
                                     mpl=max_path_length)

    elif 'Door' in envType:
        baseEnv = SawyerDoorOpenEnv(tasks=tasks,
                                    image=use_images,
                                    mpl=max_path_length)

    elif 'Ant' in envType:
        env = TfEnv(normalize(AntEnvRandGoalRing()))

    elif 'Coffee' in envType:
        baseEnv = SawyerCoffeeEnv(mpl=max_path_length)

    else:
        raise AssertionError('')

    if envType in ['Push', 'PickPlace', 'Door']:
        if use_images:
            obs_keys = ['img_observation']
        else:
            obs_keys = ['state_observation']
        env = TfEnv(
            NormalizedBoxEnv(
                FinnMamlEnv(FlatGoalEnv(baseEnv, obs_keys=obs_keys),
                            reset_mode='idx')))

    baseline = ZeroBaseline(env_spec=env.spec)
    # baseline = LinearFeatureBaseline(env_spec = env.spec)
    batch_size = variant['batch_size']

    if policyType == 'fullAda_Bias':

        baseline = LinearFeatureBaseline(env_spec=env.spec)
        algo = vpg_fullADA(
            env=env,
            policy=None,
            load_policy=init_file,
            baseline=baseline,
            batch_size=batch_size,  # 2x
            max_path_length=max_path_length,
            n_itr=n_itr,
            # noise_opt = True,
            default_step=default_step,
            sampler_cls=VectorizedSampler,  # added by RK 6/19
            sampler_args=dict(n_envs=1),

            # reset_arg=np.asscalar(taskIndex),
            reset_arg=taskIndex,
            log_dir=log_dir,
            comet_logger=comet_logger,
            outer_iteration=variant['outer_iteration'])

    elif policyType == 'biasAda_Bias':

        algo = vpg_biasADA(
            env=env,
            policy=None,
            load_policy=init_file,
            baseline=baseline,
            batch_size=batch_size,  # 2x
            max_path_length=max_path_length,
            n_itr=n_itr,
            # noise_opt = True,
            default_step=default_step,
            sampler_cls=VectorizedSampler,  # added by RK 6/19
            sampler_args=dict(n_envs=1),
            # reset_arg=np.asscalar(taskIndex),
            reset_arg=taskIndex,
            log_dir=log_dir)

    elif policyType == 'basic':

        algo = vpg_basic(
            env=env,
            policy=None,
            load_policy=init_file,
            baseline=baseline,
            batch_size=batch_size,
            max_path_length=max_path_length,
            n_itr=n_itr,
            # step_size=10.0,
            sampler_cls=VectorizedSampler,  # added by RK 6/19
            sampler_args=dict(n_envs=1),
            reset_arg=taskIndex,
            optimizer=None,
            optimizer_args={
                'init_learning_rate': default_step,
                'tf_optimizer_args': {
                    'learning_rate': 0.5 * default_step
                },
                'tf_optimizer_cls': tf.train.GradientDescentOptimizer
            },
            log_dir=log_dir
            # extra_input="onehot_exploration", # added by RK 6/19
            # extra_input_dim=5, # added by RK 6/19
        )

    elif 'conv' in policyType:

        algo = vpg_conv(
            env=env,
            policy=None,
            load_policy=init_file,
            baseline=baseline,
            batch_size=batch_size,  # 2x
            max_path_length=max_path_length,
            n_itr=n_itr,
            sampler_cls=VectorizedSampler,  # added by RK 6/19
            sampler_args=dict(n_envs=1),
            # noise_opt = True,
            default_step=default_step,
            # reset_arg=np.asscalar(taskIndex),
            reset_arg=taskIndex,
            log_dir=log_dir)

    else:
        raise AssertionError(
            'Policy Type must be fullAda_Bias or biasAda_Bias')

    algo.train()
Exemplo n.º 20
0
    def __init__(self, args=args):
        super().__init__()

        self.device = torch.device(
            "cuda:0" if torch.cuda.is_available() else "cpu")
        dtype = torch.float
        self.args = args
        # TODO make all configurable
        self.num_epoch = args.epochs
        self.batch_size = args.train_batch_size

        self.input_time_window = 4
        self.output_time_horizon = 1
        self.temporal_stride = 1
        self.temporal_frames = 1
        self.time_steps = (self.input_time_window - self.temporal_frames +
                           1) // self.temporal_stride

        # Initiate the network
        # CxT×H×W
        input_shape = (1, self.temporal_frames, 128, 128)
        output_shape = (1, self.output_time_horizon, 128, 128)

        self.tau = 1
        hidden_size = 64
        kernel = (1, 5, 5)
        lstm_layers = 4

        self.encoder = E3DLSTM(input_shape, hidden_size, lstm_layers, kernel,
                               self.tau).type(dtype)
        self.decoder = nn.Conv3d(hidden_size * self.time_steps,
                                 output_shape[0],
                                 kernel,
                                 padding=(0, 2, 2)).type(dtype)

        if self.args.train_continue:
            if not self.args.nocomet and self.args.cometid != "":
                self.comet_exp = ExistingExperiment(
                    previous_experiment=self.args.cometid)
            elif not self.args.nocomet and self.args.cometid == "":
                self.comet_exp = Experiment(workspace=self.args.workspace,
                                            project_name=self.args.projectname)
            else:
                self.comet_exp = None
            self.ckpt_dict = torch.load(self.args.checkpoint)
            self.load_state_dict(self.ckpt_dict["state_dict"])
            self.to(self.device)
            params = self.parameters(recurse=True)
            self.optimizer = torch.optim.Adam(params,
                                              lr=self.args.init_learning_rate,
                                              weight_decay=0)
            self.optimizer.load_state_dict(self.ckpt_dict["opt_state_dict"])
            self.scheduler = torch.optim.lr_scheduler.MultiStepLR(
                self.optimizer, milestones=self.args.milestones, gamma=0.1)
        else:
            # start logging info in comet-ml
            if not self.args.nocomet:
                self.comet_exp = Experiment(workspace=self.args.workspace,
                                            project_name=self.args.projectname)
                # self.comet_exp.log_parameters(flatten_opts(self.args))
            else:
                self.comet_exp = None
            self.ckpt_dict = {
                "trainLoss": {},
                "valLoss": {},
                "valPSNR": {},
                "valSSIM": {},
                "epoch": -1,
                "detail": "End to end E3D",
                "trainBatchSz": self.args.train_batch_size,
            }
            self.to(self.device)
            params = self.parameters(recurse=True)
            self.optimizer = torch.optim.Adam(params,
                                              lr=self.args.init_learning_rate,
                                              weight_decay=0)
            self.scheduler = torch.optim.lr_scheduler.MultiStepLR(
                self.optimizer, milestones=self.args.milestones, gamma=0.1)

        # Setup optimizer

        # TODO learning rate scheduler
        # Weight decay stands for L2 regularization

        self.apply(weights_init())
Exemplo n.º 21
0
def main(args):

    print("Loading config file: ", args.config)
    params = utils.load_config_file(args.config)
    params["test_dataset_paths"] = utils.format_dataset_path(
        params["test_dataset_paths"])

    if args.existing_experiment:
        experiment = ExistingExperiment(
            api_key="jBFVYFo9VUsy0kb0lioKXfTmM",
            previous_experiment=args.existing_experiment)
    else:
        experiment = Experiment(api_key="jBFVYFo9VUsy0kb0lioKXfTmM",
                                project_name="fastdepth")

    # Data loading code
    print("Creating data loaders...")
    if args.nyu:
        from dataloaders.nyu import NYUDataset
        val_dataset = NYUDataset(params["test_dataset_paths"], split='val')
    else:
        val_dataset = Datasets.FastDepthDataset(params["test_dataset_paths"],
                                                split='val',
                                                depth_min=params["depth_min"],
                                                depth_max=params["depth_max"],
                                                input_shape_model=(224, 224))

    # set batch size to be 1 for validation
    val_loader = torch.utils.data.DataLoader(val_dataset,
                                             batch_size=1,
                                             shuffle=True,
                                             num_workers=params["num_workers"],
                                             pin_memory=True)

    # Set GPU
    params["device"] = torch.device(
        "cuda:{}".format(params["device"])
        if params["device"] >= 0 and torch.cuda.is_available() else "cpu")
    print("Using device", params["device"])

    print("Loading model '{}'".format(args.model))
    if not args.nyu:
        model, _ = utils.load_model(params, args.model, params["device"])
    else:
        # Maintain compatibility for fastdepth NYU model format
        state_dict = torch.load(args.model, map_location=params["device"])
        model = models.MobileNetSkipAdd(output_size=(224, 224),
                                        pretrained=True)
        model.load_state_dict(state_dict)
        params["start_epoch"] = 0

    model.to(params["device"])

    # Create output directory
    output_directory = os.path.join(os.path.dirname(args.model), "images")
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)
    params["experiment_dir"] = output_directory
    print("Saving results to " + output_directory)

    evaluate(params, val_loader, model, experiment)
Exemplo n.º 22
0
from comet_ml import ExistingExperiment
import matplotlib.pyplot as plt
import torch
from data import create_dataloader
from model import TransformerClassification

experiment = ExistingExperiment(
    previous_experiment='b8d5b06e99484f8a93dd0d84f8a36f3e')


def main():
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # load data
    _, _, test_dl, TEXT = create_dataloader()

    # load model
    net = TransformerClassification(TEXT.vocab.vectors,
                                    d_model=300,
                                    max_seq_len=256,
                                    output_dim=2)
    net.load_state_dict(torch.load('checkpoints/model.pt'))
    net.to(device)

    epoch_corrects = 0
    for batch in test_dl:
        inputs = batch.Text[0].to(device)
        labels = batch.Label.to(device)

        with torch.set_grad_enabled(False):
            input_pad = 1
Exemplo n.º 23
0
if args.init_type != "":
    init_net(flowComp, args.init_type)
    print(args.init_type + " initializing flowComp done")
ArbTimeFlowIntrp = model.UNet(20, 5)
ArbTimeFlowIntrp.to(device)
if args.init_type != "":
    init_net(ArbTimeFlowIntrp, args.init_type)
    print(args.init_type + " initializing ArbTimeFlowIntrp done")


### Initialization


if args.train_continue:
    if not args.nocomet and args.cometid != "":
        comet_exp = ExistingExperiment(previous_experiment=args.cometid)
    elif not args.nocomet and args.cometid == "":
        comet_exp = Experiment(workspace=args.workspace, project_name=args.projectname)
    else:
        comet_exp = None
    dict1 = torch.load(args.checkpoint)
    ArbTimeFlowIntrp.load_state_dict(dict1["state_dictAT"])
    flowComp.load_state_dict(dict1["state_dictFC"])
    print("Pretrained model loaded!")
else:
    # start logging info in comet-ml
    if not args.nocomet:
        comet_exp = Experiment(workspace=args.workspace, project_name=args.projectname)
        # comet_exp.log_parameters(flatten_opts(args))
    else:
        comet_exp = None
Exemplo n.º 24
0
def main(opts):
    """
    Opts prevalence:
        1. Load file specified in args.default (or shared/trainer/defaults.yaml
           if none is provided)
        2. Update with file specified in args.config (or no update if none is provided)
        3. Update with parsed command-line arguments

        e.g.
        `python train.py args.config=config/large-lr.yaml data.loaders.batch_size=10`
        loads defaults, overrides with values in large-lr.yaml and sets batch_size to 10
    """

    # -----------------------------
    # -----  Parse arguments  -----
    # -----------------------------

    hydra_opts = Dict(OmegaConf.to_container(opts))
    args = hydra_opts.pop("args", None)
    auto_resumed = {}

    config_path = args.config

    if hydra_opts.train.resume:
        out_ = str(env_to_path(hydra_opts.output_path))
        config_path = Path(out_) / "opts.yaml"
        if not config_path.exists():
            config_path = None
            print("WARNING: could not reuse the opts in {}".format(out_))

    default = args.default or Path(
        __file__).parent / "shared/trainer/defaults.yaml"

    # -----------------------
    # -----  Load opts  -----
    # -----------------------

    opts = load_opts(config_path, default=default, commandline_opts=hydra_opts)
    if args.resume:
        opts.train.resume = True

    opts.jobID = os.environ.get("SLURM_JOBID")
    opts.slurm_partition = os.environ.get("SLURM_JOB_PARTITION")
    opts.output_path = str(env_to_path(opts.output_path))
    print("Config output_path:", opts.output_path)

    exp = comet_previous_id = None

    # -------------------------------
    # -----  Check output_path  -----
    # -------------------------------

    # Auto-continue if same slurm job ID (=job was requeued)
    if not opts.train.resume and opts.train.auto_resume:
        print("\n\nTrying to auto-resume...")
        existing_path = find_existing_training(opts)
        if existing_path is not None and existing_path.exists():
            auto_resumed["original output_path"] = str(opts.output_path)
            auto_resumed["existing_path"] = str(existing_path)
            opts.train.resume = True
            opts.output_path = str(existing_path)

    # Still not resuming: creating new output path
    if not opts.train.resume:
        opts.output_path = str(get_increased_path(opts.output_path))
        Path(opts.output_path).mkdir(parents=True, exist_ok=True)

    # Copy the opts's sbatch_file to output_path
    copy_run_files(opts)
    # store git hash
    opts.git_hash = get_git_revision_hash()
    opts.git_branch = get_git_branch()

    if not args.no_comet:
        # ----------------------------------
        # -----  Set Comet Experiment  -----
        # ----------------------------------

        if opts.train.resume:
            # Is resuming: get existing comet exp id
            assert Path(
                opts.output_path).exists(), "Output_path does not exist"

            comet_previous_id = get_existing_comet_id(opts.output_path)
            # Continue existing experiment
            if comet_previous_id is None:
                print("WARNING could not retreive previous comet id")
                print(f"from {opts.output_path}")
            else:
                print("Continuing previous experiment", comet_previous_id)
                auto_resumed["continuing exp id"] = comet_previous_id
                exp = ExistingExperiment(previous_experiment=comet_previous_id,
                                         **comet_kwargs)
                print("Comet Experiment resumed")

        if exp is None:
            # Create new experiment
            print("Starting new experiment")
            exp = Experiment(project_name="climategan", **comet_kwargs)
            exp.log_asset_folder(
                str(Path(__file__).parent / "climategan"),
                recursive=True,
                log_file_name=True,
            )
            exp.log_asset(str(Path(__file__)))

        # Log note
        if args.note:
            exp.log_parameter("note", args.note)

        # Merge and log tags
        if args.comet_tags or opts.comet.tags:
            tags = set([f"branch:{opts.git_branch}"])
            if args.comet_tags:
                tags.update(args.comet_tags)
            if opts.comet.tags:
                tags.update(opts.comet.tags)
            opts.comet.tags = list(tags)
            print("Logging to comet.ml with tags", opts.comet.tags)
            exp.add_tags(opts.comet.tags)

        # Log all opts
        exp.log_parameters(flatten_opts(opts))
        if auto_resumed:
            exp.log_text("\n".join(f"{k:20}: {v}"
                                   for k, v in auto_resumed.items()))

        # allow some time for comet to get its url
        sleep(1)

        # Save comet exp url
        url_path = get_increased_path(Path(opts.output_path) / "comet_url.txt")
        with open(url_path, "w") as f:
            f.write(exp.url)

        # Save config file
        opts_path = get_increased_path(Path(opts.output_path) / "opts.yaml")
        with (opts_path).open("w") as f:
            yaml.safe_dump(opts.to_dict(), f)

    pprint("Running model in", opts.output_path)

    # -------------------
    # -----  Train  -----
    # -------------------

    trainer = Trainer(opts, comet_exp=exp, verbose=1)
    trainer.logger.time.start_time = time()
    trainer.setup()
    trainer.train()

    # -----------------------------
    # -----  End of training  -----
    # -----------------------------

    pprint("Done training")
    kill_job(opts.jobID)
Exemplo n.º 25
0
def experiment(variant, comet_exp_key=None):
    if comet_exp_key is not None:
        from rllab.misc.comet_logger import CometContinuedLogger, CometLogger
        from comet_ml import Experiment, ExistingExperiment
        # comet_log = CometContinuedLogger(api_key="KWwx7zh6I2uw6oQMkpEo3smu0", previous_experiment_key=variant['comet_exp_key'])
        comet_log = ExistingExperiment(api_key="KWwx7zh6I2uw6oQMkpEo3smu0", previous_experiment=variant['comet_exp_key'])
        # comet_log = CometLogger(api_key="KWwx7zh6I2uw6oQMkpEo3smu0",
        #                     project_name="ml4l3", workspace="glenb")
        comet_log.set_name("test seq train")
        # comet_log = comet_exp_key
        print (comet_log)
    else:
        comet_log = None
    print ("loading libraries")
    from sandbox.rocky.tf.algos.maml_il import MAMLIL

    from rllab.baselines.linear_feature_baseline import LinearFeatureBaseline
    from rllab.baselines.gaussian_mlp_baseline import GaussianMLPBaseline
    from rllab.baselines.maml_gaussian_mlp_baseline import MAMLGaussianMLPBaseline
    from rllab.baselines.zero_baseline import ZeroBaseline
    from rllab.envs.normalized_env import normalize
    from rllab.misc.instrument import stub, run_experiment_lite
    from sandbox.rocky.tf.policies.maml_minimal_gauss_mlp_policy import MAMLGaussianMLPPolicy as basic_policy
    # from sandbox.rocky.tf.policies.maml_minimal_gauss_mlp_policy_adaptivestep import MAMLGaussianMLPPolicy as fullAda_basic_policy
    from sandbox.rocky.tf.policies.maml_minimal_gauss_mlp_policy_adaptivestep_ppo import \
        MAMLGaussianMLPPolicy as PPO_policy
    from sandbox.rocky.tf.policies.maml_minimal_gauss_mlp_policy_adaptivestep_biastransform import \
        MAMLGaussianMLPPolicy as fullAda_Bias_policy
    from sandbox.rocky.tf.policies.maml_minimal_gauss_mlp_policy_biasonlyadaptivestep_biastransform import \
        MAMLGaussianMLPPolicy as biasAda_Bias_policy
    from sandbox.rocky.tf.policies.maml_minimal_conv_gauss_mlp_policy import MAMLGaussianMLPPolicy as conv_policy
    
    from sandbox.rocky.tf.optimizers.quad_dist_expert_optimizer import QuadDistExpertOptimizer
    from sandbox.rocky.tf.optimizers.first_order_optimizer import FirstOrderOptimizer
    from sandbox.rocky.tf.envs.base import TfEnv
    import sandbox.rocky.tf.core.layers as L
    
    from rllab.envs.mujoco.ant_env_rand_goal_ring import AntEnvRandGoalRing
    from multiworld.envs.mujoco.sawyer_xyz.push.sawyer_push import SawyerPushEnv
    from multiworld.envs.mujoco.sawyer_xyz.pickPlace.sawyer_pick_and_place import SawyerPickPlaceEnv
    from multiworld.envs.mujoco.sawyer_xyz.door.sawyer_door_open import SawyerDoorOpenEnv
    from multiworld.core.flat_goal_env import FlatGoalEnv
    from multiworld.core.finn_maml_env import FinnMamlEnv
    from multiworld.core.wrapper_env import NormalizedBoxEnv
    
    import tensorflow as tf
    import time
    from rllab.envs.gym_env import GymEnv
    
    from maml_examples.maml_experiment_vars import MOD_FUNC
    import numpy as np
    import random as rd
    import pickle
    
    print ("Done loading libraries")
    
    seed = variant['seed'];
    n_parallel = 1;
    log_dir = variant['log_dir']

    x=0
    setup(seed, n_parallel, log_dir)
    fast_batch_size = variant['fbs'];
    meta_batch_size = variant['mbs']
    adam_steps = variant['adam_steps'];
    max_path_length = variant['max_path_length']
    dagger = variant['dagger'];
    expert_policy_loc = variant['expert_policy_loc']
    ldim = variant['ldim'];
    init_flr = variant['init_flr'];
    policyType = variant['policyType'];
    use_maesn = variant['use_maesn']
    EXPERT_TRAJ_LOCATION = variant['expertDataLoc']
    envType = variant['envType']
    tasksFile = path_to_multiworld + 'multiworld/envs/goals/' + variant['tasksFile'] + '.pkl'
    all_tasks = pickle.load(open(tasksFile, 'rb'))
    assert meta_batch_size <= len(all_tasks), "meta batch size wrong: " + str(meta_batch_size) + " <= " + str(len(all_tasks))
    tasks = all_tasks[:meta_batch_size]
    print("^^^^^^^^^^^^^^^^ meta_tasks: ", tasks, " ^^^^^^^^^^^^^^^^ ")

    use_images = 'conv' in policyType

    if 'Push' == envType:
        baseEnv = SawyerPushEnv(tasks=tasks, image=use_images, mpl=max_path_length)

    elif envType == 'sparsePush':
        baseEnv = SawyerPushEnv(tasks=tasks, image=use_images, mpl=max_path_length, rewMode='l2Sparse')


    elif 'PickPlace' in envType:
        baseEnv = SawyerPickPlaceEnv(tasks=tasks, image=use_images, mpl=max_path_length)

    elif 'Door' in envType:
        baseEnv = SawyerDoorOpenEnv(tasks=tasks, image=use_images, mpl=max_path_length)

    elif 'Ant' in envType:
        env = TfEnv(normalize(AntEnvRandGoalRing()))

    elif 'claw' in envType:
        env = TfEnv(DClawScrewRandGoal())

    else:
        assert True == False

    if envType in ['Push', 'PickPlace', 'Door']:
        if use_images:
            obs_keys = ['img_observation']
        else:
            obs_keys = ['state_observation']
        env = TfEnv(NormalizedBoxEnv(FinnMamlEnv(FlatGoalEnv(baseEnv, obs_keys=obs_keys), reset_mode='idx')))

    algoClass = MAMLIL
    baseline = LinearFeatureBaseline(env_spec=env.spec)

    load_policy = variant['load_policy']

    if load_policy != None:
        policy = None
        load_policy = variant['load_policy']
        # if 'conv' in load_policy:
        #     baseline = ZeroBaseline(env_spec=env.spec)

    elif 'fullAda_PPO' in policyType:

        policy = PPO_policy(
            name="policy",
            env_spec=env.spec,
            grad_step_size=init_flr,
            hidden_nonlinearity=tf.nn.relu,
            hidden_sizes=(100, 100),
            init_flr_full=init_flr,
            latent_dim=ldim
        )
        
    elif 'fullAda_Bias' in policyType:

        policy = fullAda_Bias_policy(
            name="policy",
            env_spec=env.spec,
            grad_step_size=init_flr,
            hidden_nonlinearity=tf.nn.relu,
            hidden_sizes=(100, 100),
            init_flr_full=init_flr,
            latent_dim=ldim
        )

    elif 'biasAda_Bias' in policyType:

        policy = biasAda_Bias_policy(
            name="policy",
            env_spec=env.spec,
            grad_step_size=init_flr,
            hidden_nonlinearity=tf.nn.relu,
            hidden_sizes=(100, 100),
            init_flr_full=init_flr,
            latent_dim=ldim
        )

    elif 'basic' in policyType:
        policy = basic_policy(
            name="policy",
            env_spec=env.spec,
            grad_step_size=init_flr,
            hidden_nonlinearity=tf.nn.relu,
            hidden_sizes=(100, 100),
            extra_input_dim=(0 if extra_input is "" else extra_input_dim),
        )


    elif 'conv' in policyType:

        baseline = ZeroBaseline(env_spec=env.spec)

        policy = conv_policy(
            name="policy",
            latent_dim=ldim,
            policyType=policyType,
            env_spec=env.spec,
            init_flr=init_flr,

            hidden_nonlinearity=tf.nn.relu,
            hidden_sizes=(100, 100),
            extra_input_dim=(0 if extra_input is "" else extra_input_dim),
        )

    print("|||||||||||||||||||||||||||||||||||||||||||||||", variant['n_itr'])
    
    beta_steps = 1 ;
    meta_step_size = 0.01 ; num_grad_updates = 1
    pre_std_modifier = 1.0 ; post_std_modifier = 0.00001 
    limit_demos_num = None 

    algo = algoClass(
        env=env,
        policy=policy,
        load_policy=load_policy,
        baseline=baseline,
        batch_size=fast_batch_size,  # number of trajs for alpha grad update
        max_path_length=max_path_length,
        meta_batch_size=meta_batch_size,  # number of tasks sampled for beta grad update
        num_grad_updates=num_grad_updates,  # number of alpha grad updates
        n_itr=variant['n_itr'],
        make_video=False,
        use_maml=True,
        use_pooled_goals=True,
        use_corr_term=use_corr_term,
        test_on_training_goals=test_on_training_goals,
        metalearn_baseline=False,
        # metalearn_baseline=False,
        limit_demos_num=limit_demos_num,
        test_goals_mult=1,
        step_size=meta_step_size,
        plot=False,
        beta_steps=beta_steps,
        adam_curve=None,
        adam_steps=adam_steps,
        pre_std_modifier=pre_std_modifier,
        l2loss_std_mult=l2loss_std_mult,
        importance_sampling_modifier=MOD_FUNC[''],
        post_std_modifier=post_std_modifier,
        expert_trajs_dir=EXPERT_TRAJ_LOCATION,
        expert_trajs_suffix='',
        seed=seed,
        extra_input=extra_input,
        extra_input_dim=(0 if extra_input is "" else extra_input_dim),
        plotDirPrefix=None,
        latent_dim=ldim,
        dagger=dagger,
        expert_policy_loc=expert_policy_loc,
        comet_logger=comet_log,
        outerIteration=variant['outer_Iteration'],
        use_ppo=True
    )

    algo.train()
Exemplo n.º 26
0
parser.add_argument('-span', default=.5, type=float)
parser.add_argument('-seed', default=1234, type=int)
parser.add_argument('-eig', action='store_true')
parser.add_argument('-ckpt', default='poison-filtnorm-weaker', type=str)
parser.add_argument('-gpu', default='0', type=str)
parser.add_argument('-svhn', action='store_true')
args = parser.parse_args()

# comet stuff
if not os.path.exists('comet_expt_key_surface.txt'):
  experiment = Experiment(api_key="vPCPPZrcrUBitgoQkvzxdsh9k", parse_args=False,
                          project_name='landscape', workspace="wronnyhuang")
  open('comet_expt_key_surface.txt', 'w+').write(experiment.get_key())
else:
  comet_key = open('comet_expt_key_surface.txt', 'r').read()
  experiment = ExistingExperiment(api_key="vPCPPZrcrUBitgoQkvzxdsh9k", previous_experiment=comet_key, parse_args=False)

# apply settings
np.random.seed(args.seed)
os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

# load data and model
cleanloader, _, _ = get_loader(join(home, 'datasets'), batchsize=2 * 64, fracdirty=.5, nogan=True, svhn=args.svhn)
evaluator = Evaluator(cleanloader)
evaluator.restore_weights_dropbox('ckpt/'+args.ckpt)

# plot along which direction
if args.eig:
  eigfile = join('pickle', args.ckpt)
  if exists(eigfile): dw1 = pickle.load(eigfile) # load from file if hessian eigvec already computed
  else: # compute otherwise
Exemplo n.º 27
0
            pred_2d,
            pred_3d,
            keep_matching=True,
        )
        pck = np.mean(list(pcks.values()))
        auc = np.mean(list(aucs.values()))
        values.append(pck)
        values.append(auc)

        print(" %4.1f   %4.1f  " % (pck, auc), end="")
    print()
    exp.log_metrics({f"{prefix}-{k}": v for k, v in zip(keys, values)})


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "model_name",
        help="Name of the model (either 'normal' or 'universal')")
    parser.add_argument(
        "-r",
        "--pose-refine",
        action="store_true",
        help="Apply pose-refinement after TPN",
    )
    args = parser.parse_args()

    exp = ExistingExperiment(previous_experiment=args.model_name)

    main(args.model_name, args.pose_refine, exp)
Exemplo n.º 28
0
        optimizer.load_state_dict(checkpoint['optimizer'])
        scheduler.load_state_dict(checkpoint['scheduler'])
    else:
        raise Exception
else:
    start_epoch = 0
    best_acc = 0

experiment = None
if args.api_key:
    project_dir, experiment_name = split(dirname(realpath(__file__)))
    project_name = basename(project_dir)
    if args.resume:
        experiment = ExistingExperiment(
                api_key=args.api_key, 
                previous_experiment=checkpoint['experiment_key'],
                auto_param_logging=False,
                auto_metric_logging=False,
                parse_args=False)
    else:
        experiment = Experiment(
            api_key=args.api_key,
            project_name=project_name,
            auto_param_logging=False,
            auto_metric_logging=False,
            parse_args=False)
    experiment.log_other('experiment_name', experiment_name)
    experiment.log_parameters(vars(args))
    for k in hyperparameters:
        if type(hyperparameters[k]) == dict:
            experiment.log_parameters(hyperparameters[k], prefix=k)
        else:
Exemplo n.º 29
0
DO_INTENSITY_SHIFT = True
RANDOM_CROP = [128, 128, 128]
DO_MIXUP = False

ROT_DEGREES = 20
SCALE_FACTOR = 1.1
SIGMA = 10
MAX_INTENSITY_SHIFT = 0.1

if LOG_COMETML:
    if not "LOG_COMETML_EXISTING_EXPERIMENT" in locals():
        experiment = Experiment(api_key="", project_name="", workspace="")
    else:
        experiment = ExistingExperiment(
            api_key="",
            previous_experiment=LOG_COMETML_EXISTING_EXPERIMENT,
            project_name="",
            workspace="")
else:
    experiment = None

#network funcitons
if TRAIN_ORIGINAL_CLASSES:
    loss = bratsUtils.bratsDiceLossOriginal5
else:
    #loss = bratsUtils.bratsDiceLoss
    def loss(outputs, labels):
        return bratsUtils.bratsDiceLoss(outputs, labels, nonSquared=True)


class ResidualInner(nn.Module):
Exemplo n.º 30
0
from comet_ml import ExistingExperiment
import matplotlib.pyplot as plt
import torch
from model import Generator, Discriminator
from data import make_datapath_list, GAN_Img_Dataset, ImageTransform

experiment = ExistingExperiment(
    previous_experiment='e746c2c19f194d588fdfdbb7dc573602')


def main():
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

    # load model
    G = Generator(z_dim=20, image_size=64)
    D = Discriminator(z_dim=20, image_size=64)
    G.load_state_dict(torch.load('checkpoints/G.pt'))
    D.load_state_dict(torch.load('checkpoints/D.pt'))
    G.to(device)
    D.to(device)

    batch_size = 8
    z_dim = 20
    fixed_z = torch.randn(batch_size, z_dim)
    fixed_z = fixed_z.view(fixed_z.size(0), fixed_z.size(1), 1, 1)

    # generate fake images
    fake_images, am1, am2 = G(fixed_z.to(device))

    # real images
    train_img_list = make_datapath_list()