def save_model(
    model: nn.Module,
    comment: str,
    chkpt: int = -1,
):
    config_dict = vars(model.config)
    to_hash_dict_ = dc(config_dict)
    hash_str = str(hash(frozenset(sorted(to_hash_dict_))))

    save_dir = pjoin(
        model.config.base_dir,
        'saved_models',
        type(model).__name__,
        '{}_{}'.format(comment, hash_str),
        '{0:04d}'.format(chkpt),
    )
    os.makedirs(save_dir, exist_ok=True)
    bin_file = pjoin(save_dir, '{:s}.bin'.format(type(model).__name__))
    torch.save(model.state_dict(), bin_file)

    config_file = pjoin(save_dir,
                        '{:s}.yaml'.format(type(model.config).__name__))
    with open(config_file, 'w') as f:
        yaml.dump(config_dict, f)

    with open(pjoin(save_dir, '{}.txt'.format(now(exclude_hour_min=False))),
              'w') as f:
        f.write("chkpt {:d} saved".format(chkpt))
示例#2
0
    def train(self,
              nb_epochs: Union[int, range],
              comment: str,
              save_chkpts: bool = True):
        assert isinstance(nb_epochs,
                          (int, range)), "Please provide either range or int"

        writer_dir = pjoin(
            self.train_config.runs_dir,
            type(self.model).__name__,
            "{}".format(now(exclude_hour_min=True)),
            "{}".format(comment),
        )
        self.writer = SummaryWriter(writer_dir)

        epochs_range = range(nb_epochs) if isinstance(nb_epochs,
                                                      int) else nb_epochs
        pbar = tqdm(epochs_range)
        for epoch in pbar:
            avg_loss = self.iteration(epoch=epoch)
            pbar.set_description('epoch # {:d}, avg loss: {:3f}'.format(
                epoch + 1, avg_loss))
            if self.optim_schedule is not None:
                self.optim_schedule.step()

            if (epoch + 1) % self.train_config.chkpt_freq == 0 and save_chkpts:
                save_model(self.model, comment=comment, chkpt=epoch + 1)

            if (epoch + 1) % self.train_config.eval_freq == 0:
                nb_iters = len(self.dl_train)
                global_step = (epoch + 1) * nb_iters
                _ = self.validate(global_step, verbose=False)
                if self.dl_test is not None:
                    _ = self.test(global_step, verbose=False)
示例#3
0
def save_model2(model, comment, chkpt=-1):
    config_dict = vars(model.config)
    to_hash_dict_ = dc(config_dict)
    hash_str = str(hash(frozenset(sorted(to_hash_dict_))))
    '{0:05d}'.format(25)
    save_dir = pjoin(
        model.config.base_dir,
        'saved_models',
        type(model).__name__,
        '{}_{}'.format(comment, hash_str),
        '{0:03d}'.format(chkpt),
    )
    os.makedirs(save_dir, exist_ok=True)
    torch.save(model.state_dict(), pjoin(save_dir, 'model.bin'))

    if type(model.config).__name__ == 'VAECondig':
        config_file = pjoin(save_dir, 'vae_config.yaml')
    elif type(model.config).__name__ == 'ReadoutConfig':
        config_file = pjoin(save_dir, 'readout_config.yaml')
    else:
        raise RuntimeError("invalid config type encountered")

    with open(config_file, 'w') as f:
        yaml.dump(config_dict, f)

    with open(pjoin(save_dir, '{}.txt'.format(now(exclude_hour_min=False))),
              'w') as f:
        f.write("chkpt {:d} saved".format(chkpt))
示例#4
0
def run_lda_analysis(
    cm: str,
    load_file: str,
    results_dir: str,
    trial_types: List[str],
    shrinkage: Union[float, str] = 'auto',
    xv_fold: int = 5,
    random_state: int = 42,
    verbose: bool = True,
):

    random.seed(random_state)
    np.random.seed(random_state)
    rng = np.random.RandomState(random_state)

    lbl2idx = {lbl: i for (i, lbl) in enumerate(trial_types)}
    idx2lbl = {i: k for k, i in lbl2idx.items()}

    msg = "[INFO] running LDA analysis using shrinkage = '{}'\n"
    msg += "[INFO] cm: {}, class labels: {}\n"
    msg = msg.format(shrinkage, cm, trial_types)
    if verbose:
        print(msg)

    # save dir
    save_dir = pjoin(results_dir, 'lda', cm)
    os.makedirs(save_dir, exist_ok=True)

    fit_metadata = {
        'shrinkage': shrinkage,
        'lbl2idx': lbl2idx,
        'idx2lbl': idx2lbl,
        'save_dir': save_dir,
        'datetime': now(),
    }
    save_obj(fit_metadata, 'fit_metadata.npy', save_dir, 'np')

    for shuffle_labels in [False, True]:
        for dim in [1, 2, 3]:
            results, lda_dict = _lda(load_file, shrinkage, dim, xv_fold,
                                     lbl2idx, idx2lbl, rng, shuffle_labels,
                                     verbose)

            # save
            file_name = 'results_{:d}d_shuffled.df' if shuffle_labels else 'results_{:d}d.df'
            save_obj(results, file_name.format(dim), save_dir, 'df', verbose)
            file_name = 'extras_{:d}d_shuffled.pkl' if shuffle_labels else 'extras_{:d}d.pkl'
            save_obj(lda_dict, file_name.format(dim), save_dir, 'pkl', verbose)
def combine_results(
    run_dir: str,
    reg_detection_args: dict,
    regs_to_include: List[str] = None,
    verbose: bool = True,
):
    # sorts in increasing C value or: decreasing reg strength
    runs = next(os.walk(run_dir))[1]  # get all dirs
    runs = list(filter(isfloat, runs))  # filter out nonfloat
    runs = sorted(runs, key=lambda c: float(c))
    if regs_to_include is not None:
        if not isinstance(regs_to_include, list):
            regs_to_include = [regs_to_include]
        runs = [item for item in runs if item in regs_to_include]
    if len(runs) == 0:
        raise RuntimeError("data not found")

    if verbose:
        print("[PROGRESS] using fits: {}".format(runs))

    coeffs_dictlist = []
    performances_dictlist = []
    for x in tqdm(runs,
                  '[PROGRESS] combining previous fit data together',
                  disable=not verbose):
        load_dir = pjoin(run_dir, x)
        files = ['_coeffs.npy', '_performances.npy', '_classifiers.npy']
        listdir = os.listdir(load_dir)

        if not all(elem in listdir for elem in files):
            metadata = np.load(pjoin(load_dir, 'fit_metadata.npy'),
                               allow_pickle=True).item()
            combine_fits(metadata, verbose)

        with open(pjoin(load_dir, '_coeffs.npy'), 'rb') as f:
            _coeffs = np.load(f.name, allow_pickle=True).item()
            coeffs_dictlist.append(_coeffs)
        with open(pjoin(load_dir, '_performances.npy'), 'rb') as f:
            _performances = np.load(f.name, allow_pickle=True).item()
            performances_dictlist.append(_performances)

    coeffs = merge_dicts(coeffs_dictlist, verbose)
    performances = merge_dicts(performances_dictlist, verbose)

    performances, performances_filtered, coeffs_filtered = _porocess_results(
        performances, coeffs, reg_detection_args, verbose)

    # save
    time_now = now(exclude_hour_min=True)
    save_dir = "time[{:d}:{:d}]_filter{:d}_thres{}"
    save_dir = save_dir.format(*reg_detection_args.values())
    save_dir = pjoin(run_dir, save_dir)
    os.makedirs(save_dir, exist_ok=True)

    save_obj(
        obj=pd.DataFrame.from_dict(coeffs),
        file_name="coeffs_{:s}.df".format(time_now),
        save_dir=save_dir,
        mode='df',
        verbose=verbose,
    )
    del coeffs
    save_obj(
        obj=pd.DataFrame.from_dict(performances),
        file_name="performances_{:s}.df".format(time_now),
        save_dir=save_dir,
        mode='df',
        verbose=verbose,
    )
    del performances
    save_obj(
        obj=pd.DataFrame.from_dict(performances_filtered),
        file_name="performances_filtered_{:s}.df".format(time_now),
        save_dir=save_dir,
        mode='df',
        verbose=verbose,
    )
    del performances_filtered

    # clfs
    classifiers = {}
    for x in tqdm(runs,
                  '[PROGRESS] combining previous classifiers together',
                  disable=not verbose):
        load_dir = pjoin(run_dir, x)
        files = ['_coeffs.npy', '_performances.npy', '_classifiers.npy']
        listdir = os.listdir(load_dir)

        if not all(elem in listdir for elem in files):
            metadata = np.load(pjoin(load_dir, 'fit_metadata.npy'),
                               allow_pickle=True).item()
            combine_fits(metadata, verbose)

        with open(pjoin(load_dir, '_classifiers.npy'), 'rb') as f:
            _classifiers = np.load(f.name, allow_pickle=True).item()
            assert not set(_classifiers.keys()).intersection(set(classifiers.keys())),\
                "must have non-overlapping keys by design"
            classifiers.update(_classifiers)

    import warnings
    with warnings.catch_warnings():
        warnings.simplefilter("ignore", category=RuntimeWarning)
        coeffs_filtered = _compute_feature_importances(coeffs_filtered,
                                                       classifiers)

    # save
    save_obj(
        obj=pd.DataFrame.from_dict(coeffs_filtered),
        file_name="coeffs_filtered_{:s}.df".format(time_now),
        save_dir=save_dir,
        mode='df',
        verbose=verbose,
    )
    del coeffs_filtered
    save_obj(
        obj=classifiers,
        file_name="classifiers_{:s}.pkl".format(time_now),
        save_dir=save_dir,
        mode='pkl',
        verbose=verbose,
    )
    del classifiers