Exemplo n.º 1
0
def save_training_meta(args):
    # Comment out, since rank is not saved to args. Safeguard save_training_meta already in training scripts.
    # if args.rank > 0:
    #    return

    # args is an EasyDict object, treat it the same as a normal dict
    os.makedirs(join(args.output_dir, 'log'), exist_ok=True)
    os.makedirs(join(args.output_dir, 'ckpt'), exist_ok=True)

    # training args
    save_args_path = join(args.output_dir, 'log', 'hps.json')
    save_json(vars(args), save_args_path, save_pretty=True)

    # model args
    model_config = load_json(args.model_config)
    save_model_config_path = join(args.output_dir, 'log', 'model_config.json')
    save_json(model_config, save_model_config_path, save_pretty=True)
    # git info
    try:
        LOGGER.info("Waiting on git info....")
        c = subprocess.run(["git", "rev-parse", "--abbrev-ref", "HEAD"],
                           timeout=10,
                           stdout=subprocess.PIPE)
        git_branch_name = c.stdout.decode().strip()
        LOGGER.info("Git branch: %s", git_branch_name)
        c = subprocess.run(["git", "rev-parse", "HEAD"],
                           timeout=10,
                           stdout=subprocess.PIPE)
        git_sha = c.stdout.decode().strip()
        LOGGER.info("Git SHA: %s", git_sha)
        git_dir = abspath(dirname(__file__))
        git_status = subprocess.check_output(['git', 'status', '--short'],
                                             cwd=git_dir,
                                             universal_newlines=True).strip()
        with open(join(args.output_dir, 'log', 'git_info.json'),
                  'w') as writer:
            json.dump(
                {
                    'branch': git_branch_name,
                    'is_dirty': bool(git_status),
                    'status': git_status,
                    'sha': git_sha
                },
                writer,
                indent=4)
    except (subprocess.TimeoutExpired, subprocess.CalledProcessError) as e:
        LOGGER.exception(e)
        LOGGER.warn("Git info not found. Saving code into zip instead...")
        # save a copy of the codebase.
        # !!!Do not store heavy file in your codebase when using it.
        code_dir = dirname(dirname(realpath(__file__)))
        code_zip_filename = os.path.join(args.output_dir, "code.zip")
        LOGGER.info(f"Saving code from {code_dir} to {code_zip_filename}...")
        make_zipfile(code_dir,
                     code_zip_filename,
                     enclosing_dir="code",
                     exclude_dirs_substring="results",
                     exclude_dirs=["results", "debug_results", "__pycache__"],
                     exclude_extensions=[".pyc", ".ipynb", ".swap"])
        LOGGER.info("Saving code done.")
Exemplo n.º 2
0
    def parse(self):
        if not self.initialized:
            self.initialize()
        opt = self.parser.parse_args()

        if opt.debug:
            opt.results_root = os.path.sep.join(
                opt.results_root.split(os.path.sep)[:-1] + [
                    "debug_results",
                ])
            opt.no_core_driver = True
            opt.num_workers = 0
            opt.eval_query_bsz = 100

        if isinstance(self, TestOptions):
            # modify model_dir to absolute path
            opt.model_dir = os.path.join(
                os.path.dirname(os.path.abspath(__file__)), "results",
                opt.model_dir)
            saved_options = load_json(
                os.path.join(opt.model_dir, self.saved_option_filename))
            for arg in saved_options:  # use saved options to overwrite all BaseOptions args.
                if arg not in [
                        "results_root", "num_workers", "nms_thd", "debug",
                        "eval_split_name", "eval_path", "eval_query_bsz",
                        "eval_context_bsz", "max_pred_l", "min_pred_l",
                        "external_inference_vr_res_path"
                ]:
                    setattr(opt, arg, saved_options[arg])
            # opt.no_core_driver = True
        else:
            if opt.exp_id is None:
                raise ValueError(
                    "--exp_id is required for at a training option!")

            if opt.clip_length is None:
                opt.clip_length = ProposalConfigs[opt.dset_name]["clip_length"]
                print("Loaded clip_length {} from proposal config file".format(
                    opt.clip_length))
            opt.results_dir = os.path.join(
                opt.results_root, "-".join([
                    opt.dset_name, opt.ctx_mode, opt.exp_id,
                    time.strftime("%Y_%m_%d_%H_%M_%S")
                ]))
            mkdirp(opt.results_dir)
            # save a copy of current code
            code_dir = os.path.dirname(os.path.realpath(__file__))
            code_zip_filename = os.path.join(opt.results_dir, "code.zip")
            make_zipfile(
                code_dir,
                code_zip_filename,
                enclosing_dir="code",
                exclude_dirs_substring="results",
                exclude_dirs=["results", "debug_results", "__pycache__"],
                exclude_extensions=[".pyc", ".ipynb", ".swap"],
            )

        self.display_save(opt)

        if "sub" in opt.ctx_mode:
            assert opt.dset_name == "tvr", "sub is only supported for tvr dataset"

        if opt.hard_negtiave_start_epoch != -1:
            if opt.hard_pool_size > opt.bsz:
                print("[WARNING] hard_pool_size is larger than bsz")

        assert opt.stop_task in opt.eval_tasks_at_training
        opt.ckpt_filepath = os.path.join(opt.results_dir, self.ckpt_filename)
        opt.train_log_filepath = os.path.join(opt.results_dir,
                                              self.train_log_filename)
        opt.eval_log_filepath = os.path.join(opt.results_dir,
                                             self.eval_log_filename)
        opt.tensorboard_log_dir = os.path.join(opt.results_dir,
                                               self.tensorboard_log_dir)
        opt.device = torch.device(
            "cuda:%d" % opt.device_ids[0] if opt.device >= 0 else "cpu")
        opt.h5driver = None if opt.no_core_driver else "core"
        # num_workers > 1 will only work with "core" mode, i.e., memory-mapped hdf5
        opt.num_workers = 1 if opt.no_core_driver else opt.num_workers
        opt.pin_memory = not opt.no_pin_memory

        if "video" in opt.ctx_mode and opt.vid_feat_size > 3000:  # 3072, the normalized concatenation of resnet+i3d
            assert opt.no_norm_vfeat

        if "tef" in opt.ctx_mode and "video" in opt.ctx_mode:
            opt.vid_feat_size += 2
        if "tef" in opt.ctx_mode and "sub" in opt.ctx_mode:
            opt.sub_feat_size += 2

        if "video" not in opt.ctx_mode or "sub" not in opt.ctx_mode:
            opt.no_merge_two_stream = True
            opt.no_cross_att = True

        self.opt = opt
        return opt
Exemplo n.º 3
0
    def parse(self):
        if not self.initialized:
            self.initialize()
        opt = self.parser.parse_args()

        if opt.debug:
            opt.results_root = os.path.sep.join(
                opt.results_root.split(os.path.sep)[:-1] + [
                    "debug_results",
                ])
            opt.no_core_driver = True
            opt.num_workers = 0

        if isinstance(self, TestOptions):
            # modify model_dir to absolute path
            opt.model_dir = os.path.join(
                os.path.dirname(os.path.abspath(__file__)), "results",
                opt.model_dir)
            saved_options = load_json(
                os.path.join(opt.model_dir, self.saved_option_filename))
            for arg in saved_options:  # use saved options to overwrite all BaseOptions args.
                if arg not in [
                        "results_root", "num_workers", "nms_thd", "debug",
                        "eval_split_name", "eval_path", "use_intermediate",
                        "external_inference_vr_res_path"
                ]:
                    setattr(opt, arg, saved_options[arg])
            # opt.no_core_driver = True
        else:
            if opt.exp_id is None:
                raise ValueError(
                    "--exp_id is required for at a training option!")

            if opt.clip_length is None:
                opt.clip_length = ProposalConfigs[opt.dset_name]["clip_length"]
            opt.results_dir = os.path.join(
                opt.results_root, "-".join([
                    opt.dset_name, opt.model_type, opt.ctx_mode, opt.exp_id,
                    time.strftime("%Y_%m_%d_%H_%M_%S")
                ]))
            mkdirp(opt.results_dir)
            # save a copy of current code
            code_dir = os.path.dirname(os.path.realpath(__file__))
            code_zip_filename = os.path.join(opt.results_dir, "code.zip")
            make_zipfile(
                code_dir,
                code_zip_filename,
                enclosing_dir="code",
                exclude_dirs_substring="results",
                exclude_dirs=["results", "debug_results", "__pycache__"],
                exclude_extensions=[".pyc", ".ipynb", ".swap"])

        self.save_args(opt)

        if "sub" in opt.ctx_mode:
            assert opt.dset_name == "tvr", "sub is only supported for tvr dataset"

        if "video" in opt.ctx_mode and opt.vid_feat_size > 3000:  # 3072, the normalized concatenation of resnet+i3d
            assert opt.no_norm_vfeat

        opt.ckpt_filepath = os.path.join(opt.results_dir, self.ckpt_filename)
        opt.train_log_filepath = os.path.join(opt.results_dir,
                                              self.train_log_filename)
        opt.eval_log_filepath = os.path.join(opt.results_dir,
                                             self.eval_log_filename)
        opt.tensorboard_log_dir = os.path.join(opt.results_dir,
                                               self.tensorboard_log_dir)
        opt.device = torch.device(
            "cuda:%d" % opt.device_ids[0] if opt.device >= 0 else "cpu")
        opt.h5driver = None if opt.no_core_driver else "core"
        # num_workers > 1 will only work with "core" mode, i.e., memory-mapped hdf5
        opt.pin_memory = not opt.no_pin_memory
        opt.num_workers = 1 if opt.no_core_driver else opt.num_workers

        # Display settings
        print("------------ Options -------------\n{}\n-------------------".
              format({str(k): str(v)
                      for k, v in sorted(vars(opt).items())}))
        self.opt = opt
        return opt