Пример #1
0
    def compute_fstar(self, model_func, loss_function, fname):
        if os.path.exists(fname):
            fstar_list = hu.load_pkl(fname)
        else:
            fstar_list = np.ones(len(self)) * -1

            for i in range(len(self)):
                batch = self[i]
                images, labels = batch['images'][None].cuda(), batch['labels'][None].cuda()

                model = model_func()
                opt = torch.optim.Adam(model.parameters())

                for j in range(10000):
                    opt.zero_grad()

                    closure = lambda : loss_function(model, images, labels, backwards=True)
                    loss = opt.step(closure).item()
                    
                    grad_current = sps.get_grad_list(model.parameters())
                    grad_norm = sps.compute_grad_norm(grad_current)

                    if np.isnan(loss):
                        print('nan')
                    # print(i, loss)
                    if grad_norm < 1e-6:
                        break
                    if j > 0 and abs(loss_old - loss) < 1e-6:
                        break
                    loss_old = loss
                print("%d/%d - converged:%d - %.6f"% (i, len(self), j, loss))
                fstar_list[i] = loss
            hu.save_pkl(fname, fstar_list)

        self.fstar_list = fstar_list
Пример #2
0
    def __init__(self, model, nclasses, exp_dict):
        """ Constructor
        Args:
            model: architecture to train
            nclasses: number of output classes
            exp_dict: reference to dictionary with the hyperparameters
        """
        super().__init__()
        self.model = model
        self.exp_dict = exp_dict 
        self.ngpu = self.exp_dict["ngpu"]

        self.embedding_propagation = EmbeddingPropagation()
        self.label_propagation = LabelPropagation()
        self.model.add_classifier(nclasses, modalities=0)
        self.nclasses = nclasses

        if self.exp_dict["rotation_weight"] > 0:
            self.model.add_classifier(4, "classifier_rot")

        best_accuracy = -1 
        if self.exp_dict["pretrained_weights_root"] is not None:
            for exp_hash in os.listdir(self.exp_dict['pretrained_weights_root']):
                base_path = os.path.join(self.exp_dict['pretrained_weights_root'], exp_hash)
                exp_dict_path = os.path.join(base_path, 'exp_dict.json')
                if not os.path.exists(exp_dict_path):
                    continue
                loaded_exp_dict = haven.load_json(exp_dict_path)
                pkl_path = os.path.join(base_path, 'score_list_best.pkl')
                if (loaded_exp_dict["model"]["name"] == 'pretraining' and 
                        loaded_exp_dict["dataset_train"].split('_')[-1] == exp_dict["dataset_train"].split('_')[-1] and 
                        loaded_exp_dict["model"]["backbone"] == exp_dict['model']["backbone"] and
                        # loaded_exp_dict["labelprop_alpha"] == exp_dict["labelprop_alpha"] and
                        # loaded_exp_dict["labelprop_scale"] == exp_dict["labelprop_scale"] and
                        os.path.exists(pkl_path)):
                    accuracy = haven.load_pkl(pkl_path)[-1]["val_accuracy"]
                    try:
                        self.model.load_state_dict(torch.load(os.path.join(base_path, 'checkpoint_best.pth'))['model'], strict=False)
                        if accuracy > best_accuracy:
                            best_path = os.path.join(base_path, 'checkpoint_best.pth')
                            best_accuracy = accuracy
                    except:
                        continue
            assert(best_accuracy > 0.1)
            print("Finetuning %s with original accuracy : %f" %(base_path, best_accuracy))
            self.model.load_state_dict(torch.load(best_path)['model'], strict=False)

        # Add optimizers here
        self.optimizer = torch.optim.SGD(self.model.parameters(), 
                                            lr=self.exp_dict["lr"],
                                            momentum=0.9,
                                            weight_decay=self.exp_dict["weight_decay"], 
                                            nesterov=True)
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer,
                                                                    mode="min" if "loss" in self.exp_dict["target_loss"] else "max",
                                                                    patience=self.exp_dict["patience"])
        self.model.cuda()
        if self.ngpu > 1:
            self.parallel_model = torch.nn.DataParallel(self.model, device_ids=list(range(self.ngpu)))
Пример #3
0
    def __init__(
        self,
        split,
        datadir,
        exp_dict,
    ):
        self.exp_dict = exp_dict
        self.datadir = datadir
        self.split = split
        self.n_classes = 5

        self.img_path = os.path.join(datadir, 'OpenSourceDCMs')
        self.lung_path = os.path.join(datadir, 'LungMasks')
        self.tgt_path = os.path.join(datadir, 'InfectionMasks')

        self.img_tgt_dict = []
        for tgt_name in os.listdir(self.tgt_path):
            lung_name = os.path.join(self.lung_path, tgt_name)
            scan_id, slice_id = tgt_name.split('_')
            slice_id = str(int(slice_id.replace('z', '').replace('.png',
                                                                 ''))).zfill(4)
            img_name = [
                f for f in os.listdir(
                    os.path.join(self.img_path, 'DCM' + scan_id))
                if 's%s' % slice_id in f
            ][0]
            img_name = os.path.join('DCM' + scan_id, img_name)

            self.img_tgt_dict += [{
                'img': img_name,
                'tgt': tgt_name,
                'lung': lung_name
            }]

        # get label_meta
        fname = os.path.join(datadir, 'tmp', 'labels_array.pkl')
        if not os.path.exists(fname):
            labels_array = np.zeros((len(self.img_tgt_dict), 3))
            for i, idict in enumerate(tqdm.tqdm(self.img_tgt_dict)):
                img_name, tgt_name = idict['img'], idict['tgt']
                mask = np.array(
                    Image.open(os.path.join(self.tgt_path, tgt_name)))
                uniques = np.unique(mask)
                if 0 in uniques:
                    labels_array[i, 0] = 1
                if 127 in uniques:
                    labels_array[i, 1] = 1
                if 255 in uniques:
                    labels_array[i, 2] = 1
            hu.save_pkl(fname, labels_array)

        labels_array = hu.load_pkl(fname)
        # self.np.where(labels_array[:,1:].max(axis=1))
        ind_list = np.where(labels_array[:, 1:].max(axis=1))[0]
        self.img_tgt_dict = np.array(self.img_tgt_dict)[ind_list]
        if split == 'train':
            self.img_tgt_dict = self.img_tgt_dict[:300]
        elif split == 'val':
            self.img_tgt_dict = self.img_tgt_dict[300:]
Пример #4
0
def trainval(exp_dict, savedir_base, datadir, reset=False, num_workers=0):
    # bookkeepting stuff
    # ==================
    pprint.pprint(exp_dict)
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)
    if reset:
        hc.delete_and_backup_experiment(savedir)

    os.makedirs(savedir, exist_ok=True)
    if not os.path.join(savedir, "exp_dict.json"):
        hu.save_json(os.path.join(savedir, "exp_dict.json"), exp_dict)
        print("Experiment saved in %s" % savedir)

    # BCD train
    # ==================
    # Ignore the following combinations
    if not ut.is_valid_exp(exp_dict):
        return

    score_list_fname = os.path.join(savedir, 'score_list.pkl')
    if os.path.exists(score_list_fname):
        score_list = hu.load_pkl(score_list_fname)

    else:
        score_list = train(dataset_name=exp_dict['dataset']['name'],
                           loss_name=exp_dict['dataset']['loss'],
                           block_size=exp_dict['block_size'],
                           partition_rule=exp_dict['partition'],
                           selection_rule=exp_dict['selection'],
                           update_rule=exp_dict['update'],
                           n_iters=exp_dict['max_iters'],
                           L1=exp_dict.get('l1', 0),
                           L2=0,
                           datasets_path=datadir)

        hu.save_pkl(score_list_fname, score_list)

    print('Experiment completed.')
    return score_list
def trainval(exp_dict, savedir_base, datadir, reset=False, num_workers=0):
    # bookkeepting stuff
    # ==================
    pprint.pprint(exp_dict)
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)
    if reset:
        hc.delete_and_backup_experiment(savedir)

    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, "exp_dict.json"), exp_dict)
    print("Experiment saved in %s" % savedir)

    # set seed
    # ==================
    seed = 42
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    # Dataset
    # ==================
    # train set
    train_set = datasets.get_dataset(dataset_dict=exp_dict["dataset"],
                                     split="train",
                                     datadir=datadir,
                                     exp_dict=exp_dict,
                                     dataset_size=exp_dict['dataset_size'])
    # val set
    val_set = datasets.get_dataset(dataset_dict=exp_dict["dataset"],
                                   split="val",
                                   datadir=datadir,
                                   exp_dict=exp_dict,
                                   dataset_size=exp_dict['dataset_size'])

    # test set
    test_set = datasets.get_dataset(dataset_dict=exp_dict["dataset"],
                                    split="test",
                                    datadir=datadir,
                                    exp_dict=exp_dict,
                                    dataset_size=exp_dict['dataset_size'])

    # val_sampler = torch.utils.data.SequentialSampler(val_set)
    val_loader = DataLoader(
        val_set,
        # sampler=val_sampler,
        batch_size=1,
        collate_fn=ut.collate_fn,
        num_workers=num_workers)
    test_loader = DataLoader(
        test_set,
        # sampler=val_sampler,
        batch_size=1,
        collate_fn=ut.collate_fn,
        num_workers=num_workers)

    # Model
    # ==================
    model = models.get_model(model_dict=exp_dict['model'],
                             exp_dict=exp_dict,
                             train_set=train_set).cuda()

    # model.opt = optimizers.get_optim(exp_dict['opt'], model)
    model_path = os.path.join(savedir, "model.pth")
    score_list_path = os.path.join(savedir, "score_list.pkl")

    if os.path.exists(score_list_path):
        # resume experiment
        model.load_state_dict(hu.torch_load(model_path))
        score_list = hu.load_pkl(score_list_path)
        s_epoch = score_list[-1]['epoch'] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # Train & Val
    # ==================
    print("Starting experiment at epoch %d" % (s_epoch))
    model.waiting = 0
    model.val_score_best = -np.inf

    train_sampler = torch.utils.data.RandomSampler(train_set,
                                                   replacement=True,
                                                   num_samples=2 *
                                                   len(test_set))

    train_loader = DataLoader(train_set,
                              sampler=train_sampler,
                              collate_fn=ut.collate_fn,
                              batch_size=exp_dict["batch_size"],
                              drop_last=True,
                              num_workers=num_workers)

    for e in range(s_epoch, exp_dict['max_epoch']):
        # Validate only at the start of each cycle
        score_dict = {}
        test_dict = model.val_on_loader(test_loader,
                                        savedir_images=os.path.join(
                                            savedir, "images"),
                                        n_images=3)
        # Train the model
        train_dict = model.train_on_loader(train_loader)

        # Validate the model
        val_dict = model.val_on_loader(val_loader)
        score_dict["val_score"] = val_dict["val_score"]

        # Get new score_dict
        score_dict.update(train_dict)
        score_dict["epoch"] = e
        score_dict["waiting"] = model.waiting

        model.waiting += 1

        # Add to score_list and save checkpoint
        score_list += [score_dict]

        # Save Best Checkpoint
        score_df = pd.DataFrame(score_list)
        if score_dict["val_score"] >= model.val_score_best:
            test_dict = model.val_on_loader(test_loader,
                                            savedir_images=os.path.join(
                                                savedir, "images"),
                                            n_images=3)
            score_dict.update(test_dict)
            hu.save_pkl(os.path.join(savedir, "score_list_best.pkl"),
                        score_list)
            # score_df.to_csv(os.path.join(savedir, "score_best_df.csv"))
            hu.torch_save(os.path.join(savedir, "model_best.pth"),
                          model.get_state_dict())
            model.waiting = 0
            model.val_score_best = score_dict["val_score"]
            print("Saved Best: %s" % savedir)

        # Report & Save
        score_df = pd.DataFrame(score_list)
        # score_df.to_csv(os.path.join(savedir, "score_df.csv"))
        print("\n", score_df.tail(), "\n")
        hu.torch_save(model_path, model.get_state_dict())
        hu.save_pkl(score_list_path, score_list)
        print("Checkpoint Saved: %s" % savedir)

        if model.waiting > 100:
            break

    print('Experiment completed et epoch %d' % e)
def trainval(exp_dict, savedir_base, data_root, reset=False, tensorboard=True):
    # bookkeeping
    # ---------------
    # get experiment directory
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)

    np.random.seed(exp_dict["seed"])
    torch.manual_seed(exp_dict["seed"])

    if reset:
        # delete and backup experiment
        hc.delete_experiment(savedir, backup_flag=True)

    writer = tensorboardX.SummaryWriter(savedir) \
        if tensorboard == 1 else None

    # create folder and save the experiment dictionary
    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, "exp_dict.json"), exp_dict)
    pprint.pprint(exp_dict)
    print("Experiment saved in %s" % savedir)

    # Dataset
    # -----------
    train_dataset, val_dataset = get_dataset(['train', 'val'], data_root,
                                             exp_dict)
    # val_dataset = get_dataset('val', exp_dict)

    # train and val loader
    if exp_dict["episodic"] == False:
        train_loader = DataLoader(train_dataset,
                                  batch_size=exp_dict['batch_size'],
                                  shuffle=True,
                                  num_workers=args.num_workers)
        val_loader = DataLoader(val_dataset,
                                batch_size=exp_dict['batch_size'],
                                shuffle=True,
                                num_workers=args.num_workers)
    else:  # to support episodes TODO: move inside each model
        from datasets.episodic_dataset import EpisodicDataLoader
        train_loader = EpisodicDataLoader(train_dataset,
                                          batch_size=exp_dict['batch_size'],
                                          shuffle=True,
                                          collate_fn=lambda x: x,
                                          num_workers=args.num_workers)
        val_loader = EpisodicDataLoader(val_dataset,
                                        batch_size=exp_dict['batch_size'],
                                        shuffle=True,
                                        collate_fn=lambda x: x,
                                        num_workers=args.num_workers)

    # Model
    # -----------
    model = get_model(exp_dict,
                      labelset=train_dataset.raw_labelset,
                      writer=writer)
    print("Model with:",
          sum(p.numel() for p in model.parameters() if p.requires_grad),
          "parameters")

    # Checkpoint
    # -----------
    model_path = os.path.join(savedir, "model.pth")
    score_list_path = os.path.join(savedir, "score_list.pkl")

    if os.path.exists(score_list_path):
        # resume experiment
        model.set_state_dict(hu.torch_load(model_path))
        score_list = hu.load_pkl(score_list_path)
        s_epoch = score_list[-1]['epoch'] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # Train & Val
    # ------------
    print("Starting experiment at epoch %d" % (s_epoch))

    for e in range(s_epoch, exp_dict['max_epoch']):
        score_dict = {}

        # Train the model
        score_dict.update(model.train_on_loader(e, train_loader))

        # Validate the model
        score_dict.update(model.val_on_loader(e, val_loader))
        score_dict["epoch"] = e

        if tensorboard:
            for key, value in score_dict.items():
                writer.add_scalar(key, value, e)
            writer.flush()
        # Visualize the model
        # model.vis_on_loader(vis_loader, savedir=savedir+"/images/")

        # Add to score_list and save checkpoint
        score_list += [score_dict]

        # Report & Save
        score_df = pd.DataFrame(score_list)
        print("\n", score_df.tail())
        hu.torch_save(model_path, model.get_state_dict())
        hu.save_pkl(score_list_path, score_list)
        print("Checkpoint Saved: %s" % savedir)

        # if model.is_end():
        #     print("Early stopping")
        #     break
    print('experiment completed')

    # Cleanup
    if tensorboard == 1:
        writer.close()
Пример #7
0
def trainval(exp_dict, savedir_base, datadir_base, reset=False):
    # bookkeeping stuff
    # ==================
    pprint.pprint(exp_dict)
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)
    if reset:
        hc.delete_and_backup_experiment(savedir)

    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, "exp_dict.json"), exp_dict)
    print("Experiment saved in %s" % savedir)

    # Dataset
    # ==================

    # load train and acrtive set
    train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                     split="train",
                                     datadir_base=datadir_base,
                                     exp_dict=exp_dict)

    active_set = ActiveLearningDataset(train_set, random_state=42)

    # val set
    val_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                   split="val",
                                   datadir_base=datadir_base,
                                   exp_dict=exp_dict)
    val_loader = DataLoader(val_set, batch_size=exp_dict["batch_size"])

    # Model
    # ==================
    model = models.get_model(model_name=exp_dict['model']['name'],
                             exp_dict=exp_dict).cuda()

    model_path = os.path.join(savedir, "model.pth")
    score_list_path = os.path.join(savedir, "score_list.pkl")

    if os.path.exists(score_list_path):
        # resume experiment
        model.set_state_dict(hu.torch_load(model_path))
        active_set.load_state_dict(
            hu.load_pkl(os.path.join(savedir, "active_set.pkl")))
        score_list = hu.load_pkl(score_list_path)
        inner_s_epoch = score_list[-1]['inner_epoch'] + 1
        s_cycle = score_list[-1]['cycle']
    else:
        # restart experiment
        score_list = []
        inner_s_epoch = 0
        s_cycle = 0

    # Train & Val
    # ==================
    print("Starting experiment at cycle %d epoch %d" %
          (s_cycle, inner_s_epoch))

    for c in range(s_cycle, exp_dict['max_cycle']):
        # Set seed
        np.random.seed(c)
        torch.manual_seed(c)
        torch.cuda.manual_seed_all(c)

        if inner_s_epoch == 0:
            active_set.label_next_batch(model)
            hu.save_pkl(os.path.join(savedir, "active_set.pkl"),
                        active_set.state_dict())

        train_loader = DataLoader(active_set,
                                  sampler=samplers.get_sampler(
                                      exp_dict['sampler']['train'],
                                      active_set),
                                  batch_size=exp_dict["batch_size"])
        # Visualize the model
        model.vis_on_loader(vis_loader,
                            savedir=os.path.join(savedir, "images"))

        for e in range(inner_s_epoch, exp_dict['max_epoch']):
            # Validate only at the start of each cycle
            score_dict = {}
            if e == 0:
                score_dict.update(model.val_on_loader(val_loader))

            # Train the model
            score_dict.update(model.train_on_loader(train_loader))

            # Validate the model
            score_dict["epoch"] = len(score_list)
            score_dict["inner_epoch"] = e
            score_dict["cycle"] = c
            score_dict['n_ratio'] = active_set.n_labelled_ratio
            score_dict["n_train"] = len(train_loader.dataset)
            score_dict["n_pool"] = len(train_loader.dataset.pool)

            # Add to score_list and save checkpoint
            score_list += [score_dict]

            # Report & Save
            score_df = pd.DataFrame(score_list)
            print("\n", score_df.tail(), "\n")
            hu.torch_save(model_path, model.get_state_dict())
            hu.save_pkl(score_list_path, score_list)
            print("Checkpoint Saved: %s" % savedir)

        inner_s_epoch = 0
Пример #8
0
def get_dataset(dataset_name, train_flag, datadir, exp_dict):
    if dataset_name == "mnist":
        dataset = torchvision.datasets.MNIST(datadir, train=train_flag,
                               download=True,
                               transform=torchvision.transforms.Compose([
                                   torchvision.transforms.ToTensor(),
                                   torchvision.transforms.Normalize(
                                       (0.5,), (0.5,))
                               ]))

    if dataset_name == "cifar10":
        if train_flag:
            transform_function = transforms.Compose([
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465),
                                    (0.2023, 0.1994, 0.2010)),
            ])
        else:
            transform_function = transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
                ])

        dataset = torchvision.datasets.CIFAR10(
            root=datadir,
            train=train_flag,
            download=True,
            transform=transform_function)

    if dataset_name == "cifar100":
        if train_flag:
            transform_function = transforms.Compose([
                transforms.RandomCrop(32, padding=4),
                transforms.RandomHorizontalFlip(),
                transforms.ToTensor(),
                transforms.Normalize((0.4914, 0.4822, 0.4465),
                                    (0.2023, 0.1994, 0.2010)),
            ])
        else:
            transform_function = transforms.Compose([
                    transforms.ToTensor(),
                    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
                ])

        dataset = torchvision.datasets.CIFAR100(
            root=datadir,
            train=train_flag,
            download=True,
            transform=transform_function)

    if dataset_name in ['syn']:
        bias = 1; 
        scaling = 10; 
        sparsity = 10; 
        solutionSparsity = 0.1;

        n = 1000
        p = 100
            
        A = np.random.randn(n,p)+bias;
        A = A.dot(np.diag(scaling* np.random.randn(p)))
        A = A * (np.random.rand(n,p) < (sparsity*np.log(n)/n));
        w = np.random.randn(p) * (np.random.rand(p) < solutionSparsity);

        b = np.sign(A.dot(w));
        b = b * np.sign(np.random.rand(n)-0.1);
        labels = np.unique(b)
        A = A / np.linalg.norm(A, axis=1)[:, None].clip(min=1e-6)
        A = A * 2
        b[b==labels[0]] = 0
        b[b==labels[1]] = 1

        dataset = torch.utils.data.TensorDataset(torch.FloatTensor(A), torch.FloatTensor(b))

        return DatasetWrapper(dataset)

        
    if dataset_name in ['mushrooms', 'w8a', 'rcv1', 'ijcnn']:
        sigma_dict = {"mushrooms": 0.5,
                      "w8a":20.0,
                      "rcv1":0.25 ,
                      "ijcnn":0.05}

        X, y = load_libsvm(dataset_name, data_dir=datadir)

        labels = np.unique(y)

        y[y==labels[0]] = 0
        y[y==labels[1]] = 1
        # splits used in experiments
        splits = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=9513451)
        X_train, X_test, Y_train, Y_test = splits

        if train_flag:
            # fname_rbf = "%s/rbf_%s_%s_train.pkl" % (datadir, dataset_name, sigma_dict[dataset_name])
            fname_rbf = "%s/rbf_%s_%s_train.npy" % (datadir, dataset_name, sigma_dict[dataset_name])
            if os.path.exists(fname_rbf):
                k_train_X = np.load(fname_rbf)
            else:
                k_train_X = rbf_kernel(X_train, X_train, sigma_dict[dataset_name])
                np.save(fname_rbf, k_train_X)
                print('%s saved' % fname_rbf)

            X_train = k_train_X
            X_train = torch.FloatTensor(X_train)
            Y_train = torch.FloatTensor(Y_train)

            dataset = torch.utils.data.TensorDataset(X_train, Y_train)

        else:
            fname_rbf = "%s/rbf_%s_%s_test.npy" % (datadir, dataset_name, sigma_dict[dataset_name])
            if os.path.exists(fname_rbf):
                k_test_X = np.load(fname_rbf)
            else:
                k_test_X = rbf_kernel(X_test, X_train, sigma_dict[dataset_name])
                # hu.save_pkl(fname_rbf, k_test_X)
                np.save(fname_rbf, k_test_X)
                print('%s saved' % fname_rbf)

            X_test = k_test_X
            X_test = torch.FloatTensor(X_test)
            Y_test = torch.FloatTensor(Y_test)

            dataset = torch.utils.data.TensorDataset(X_test, Y_test)

    if dataset_name == "matrix_fac":
        fname = datadir + 'matrix_fac.pkl'
        if not os.path.exists(fname):
            data = generate_synthetic_matrix_factorization_data()
            hu.save_pkl(fname, data)

        A, y = hu.load_pkl(fname)

        X_train, X_test, y_train, y_test = train_test_split(A, y, test_size=0.2, random_state=9513451)

        training_set = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float), torch.tensor(y_train, dtype=torch.float))
        test_set = torch.utils.data.TensorDataset(torch.tensor(X_test, dtype=torch.float), torch.tensor(y_test, dtype=torch.float))

        if train_flag:
            dataset = training_set
        else:
            dataset = test_set

    return DatasetWrapper(dataset)
Пример #9
0
def get_dataset(dataset_name, train_flag, datadir, exp_dict):
    if dataset_name == "mnist":
        dataset = torchvision.datasets.MNIST(datadir, train=train_flag,
                               download=True,
                               transform=torchvision.transforms.Compose([
                                   torchvision.transforms.ToTensor(),
                                   torchvision.transforms.Normalize(
                                       (0.5,), (0.5,))
                               ]))

    if dataset_name == "cifar10":
        transform_function = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2023, 0.1994, 0.2010)),
        ])

        dataset = torchvision.datasets.CIFAR10(
            root=datadir,
            train=train_flag,
            download=True,
            transform=transform_function)

    if dataset_name == "cifar100":
        transform_function = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.4914, 0.4822, 0.4465),
                                 (0.2023, 0.1994, 0.2010)),
        ])

        dataset = torchvision.datasets.CIFAR100(
            root=datadir,
            train=train_flag,
            download=True,
            transform=transform_function)

    if dataset_name in ["mushrooms", "w8a",
                        "rcv1", "ijcnn"]:

        sigma_dict = {"mushrooms": 0.5,
                      "w8a":20.0,
                      "rcv1":0.25 ,
                      "ijcnn":0.05}

        X, y = load_libsvm(dataset_name, data_dir=datadir)

        labels = np.unique(y)

        y[y==labels[0]] = 0
        y[y==labels[1]] = 1
		# TODO: (amishkin) splits = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=9513451)
        splits = train_test_split(X, y, test_size=0.2, shuffle=False, random_state=42)
        X_train, X_test, Y_train, Y_test = splits


        if train_flag:
            # fname_rbf = "%s/rbf_%s_train.pkl" % (datadir, dataset_name)

            # if os.path.exists(fname_rbf):
            #     k_train_X = hu.load_pkl(fname_rbf)
            # else:
            k_train_X = rbf_kernel(X_train, X_train, sigma_dict[dataset_name])
                # hu.save_pkl(fname_rbf, k_train_X)

            X_train = k_train_X
            X_train = torch.FloatTensor(X_train)
            Y_train = torch.FloatTensor(Y_train)

            dataset = torch.utils.data.TensorDataset(X_train, Y_train)

        else:
            # fname_rbf = "%s/rbf_%s_test.pkl" % (datadir, dataset_name)
            # if os.path.exists(fname_rbf):
            #     k_test_X = hu.load_pkl(fname_rbf)
            # else:
            k_test_X = rbf_kernel(X_test, X_train, sigma_dict[dataset_name])
                # hu.save_pkl(fname_rbf, k_test_X)

            X_test = k_test_X
            X_test = torch.FloatTensor(X_test)
            Y_test = torch.FloatTensor(Y_test)

            dataset = torch.utils.data.TensorDataset(X_test, Y_test)

        return dataset

    if dataset_name == "synthetic":
        margin = exp_dict["margin"]

        X, y, _, _ = make_binary_linear(n=exp_dict["n_samples"],
                                        d=exp_dict["d"],
                                        margin=margin,
                                        y01=True,
                                        bias=True,
                                        separable=True,
                                        seed=42)
        # No shuffling to keep the support vectors inside the training set
        splits = train_test_split(X, y, test_size=0.2, shuffle=False, random_state=42)
        X_train, X_test, Y_train, Y_test = splits

        X_train = torch.FloatTensor(X_train)
        X_test = torch.FloatTensor(X_test)

        Y_train = torch.FloatTensor(Y_train)
        Y_test = torch.FloatTensor(Y_test)

        if train_flag:
            dataset = torch.utils.data.TensorDataset(X_train, Y_train)
        else:
            dataset = torch.utils.data.TensorDataset(X_test, Y_test)

        return dataset

    if dataset_name == "matrix_fac":
        fname = datadir + 'matrix_fac.pkl'
        if not os.path.exists(fname):
            data = generate_synthetic_matrix_factorization_data()
            hu.save_pkl(fname, data)

        A, y = hu.load_pkl(fname)

        X_train, X_test, y_train, y_test = train_test_split(A, y, test_size=0.2, random_state=9513451)

        training_set = torch.utils.data.TensorDataset(torch.tensor(X_train, dtype=torch.float), torch.tensor(y_train, dtype=torch.float))
        test_set = torch.utils.data.TensorDataset(torch.tensor(X_test, dtype=torch.float), torch.tensor(y_test, dtype=torch.float))

        if train_flag:
            dataset = training_set
        else:
            dataset = test_set

    return dataset
Пример #10
0
def trainval(exp_dict, savedir_base, datadir_base, reset=False, 
            num_workers=0, pin_memory=False, ngpu=1, cuda_deterministic=False):
    # bookkeeping
    # ==================

    # get experiment directory
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)

    if reset:
        # delete and backup experiment
        hc.delete_experiment(savedir, backup_flag=True)

    # create folder and save the experiment dictionary
    hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict)
    pprint.pprint(exp_dict)
    print('Experiment saved in %s' % savedir)

    if DEVICE.type == "cuda":
        if cuda_deterministic:
            cudnn.benchmark = False
            cudnn.deterministic = True
        else:
            cudnn.benchmark = True

    # Dataset
    # ==================
    trainset = get_dataset(exp_dict['dataset'], 'train',
                           exp_dict=exp_dict, datadir_base=datadir_base,
                           n_samples=exp_dict['dataset_size']['train'],
                           transform_lvl=exp_dict['dataset']['transform_lvl'],
                           colorjitter=exp_dict['dataset'].get('colorjitter')
                           )

    valset = get_dataset(exp_dict['dataset'], 'validation',
                         exp_dict=exp_dict, datadir_base=datadir_base,
                         n_samples=exp_dict['dataset_size']['train'],
                         transform_lvl=0,
                         val_transform=exp_dict['dataset']['val_transform'])

    testset = get_dataset(exp_dict['dataset'], 'test',
                          exp_dict=exp_dict, datadir_base=datadir_base,
                          n_samples=exp_dict['dataset_size']['test'],
                          transform_lvl=0,
                          val_transform=exp_dict['dataset']['val_transform'])
    print("Dataset defined.")

    # define dataloaders
    if exp_dict['dataset']['name'] == 'bach':
        testloader = torch.utils.data.DataLoader(testset, batch_size=1,
                                                 shuffle=False,
                                                 num_workers=num_workers,
                                                 pin_memory=pin_memory)
    else:
        testloader = torch.utils.data.DataLoader(testset, batch_size=exp_dict['batch']['size'],
                                                 shuffle=False,
                                                 num_workers=num_workers,
                                                 pin_memory=pin_memory)

    print("Testloader  defined.")

    # Model
    # ==================
    model = get_model(exp_dict, trainset, device=DEVICE)

    print("Model loaded")

    model_path = os.path.join(savedir, 'model.pth')
    model_best_path = os.path.join(savedir, 'model_best.pth')
    score_list_path = os.path.join(savedir, 'score_list.pkl')

    # checkpoint management
    if os.path.exists(score_list_path):
        # resume experiment
        model.load_state_dict(hu.torch_load(model_path))
        score_list = hu.load_pkl(score_list_path)
        s_epoch = len(score_list)
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # define and log random seed for reproducibility
    assert('fixedSeed' in exp_dict)
    seed = exp_dict['fixedSeed']

    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    print("Seed defined.")

    # Train & Val
    # ==================
    print("Starting experiment at epoch %d/%d" % (s_epoch, exp_dict['niter']))

    for epoch in range(s_epoch, exp_dict['niter']):
        s_time = time.time()
        # Sample new train val
        trainloader, valloader = get_train_val_dataloader(exp_dict,
                                                          trainset, valset,
                                                          mixtrainval=exp_dict['mixTrainVal'],
                                                          num_workers=num_workers,
                                                          pin_memory=pin_memory)
        # Train & validate
        train_dict = model.train_on_loader(trainloader, valloader, epoch=epoch,
                                           exp_dict=exp_dict)

        # Test phase
        train_dict_2 = model.test_on_loader(trainloader)
        val_dict = model.test_on_loader(valloader)
        test_dict = model.test_on_loader(testloader)

        # Vis phase
        model.vis_on_loader('train', trainset, savedir_images=os.path.join(
            savedir, 'images'), epoch=epoch)

        score_dict = {}
        score_dict["epoch"] = epoch
        score_dict["test_acc"] = test_dict['acc']
        score_dict["val_acc"] = val_dict['acc']
        score_dict["train_acc"] = train_dict_2['acc']
        score_dict["train_loss"] = train_dict['loss']
        score_dict["time_taken"] = time.time() - s_time
        score_dict["netC_lr"] = train_dict['netC_lr']

        if exp_dict['model']['netA'] is not None:
            if 'transformations_mean' in train_dict:
                for i in range(len(train_dict['transformations_mean'])):
                    score_dict[str(
                        i) + "_mean"] = train_dict['transformations_mean'][i].item()
            if 'transformations_std' in train_dict:
                for i in range(len(train_dict['transformations_std'])):
                    score_dict[str(
                        i) + "_std"] = train_dict['transformations_std'][i].item()

        # Add to score_list and save checkpoint
        score_list += [score_dict]

        # Report & Save
        score_df = pd.DataFrame(score_list)
        print("\n", score_df.tail(), "\n")
        hu.torch_save(model_path, model.get_state_dict())
        hu.save_pkl(score_list_path, score_list)
        print("Checkpoint Saved: %s" % savedir)

        # Update best score
        if epoch == 0 or (score_dict["test_acc"] >= score_df["test_acc"][:-1].max()):
            hu.save_pkl(os.path.join(
                savedir, "score_list_best.pkl"), score_list)
            hu.torch_save(os.path.join(savedir, "model_best.pth"),
                          model.get_state_dict())

            print("Saved Best: %s" % savedir)

    print('experiment completed')
def trainval(exp_dict, savedir_base, datadir, reset=False, num_workers=0):
    # bookkeepting stuff
    # ==================

    savedir = os.path.join(savedir_base, hu.hash_dict(exp_dict))
    os.makedirs(savedir, exist_ok=True)

    if reset:
        hc.delete_and_backup_experiment(savedir)

    print("Experiment saved in %s" % savedir)

    # Dataset
    # ==================
    # train set

    data_transform = A.Compose(
        [
            A.Flip(p=0.3),
            A.IAAAffine(p=0.3),
            A.Rotate(p=0.3),
            A.HueSaturationValue(hue_shift_limit=10,
                                 sat_shift_limit=15,
                                 val_shift_limit=10,
                                 p=0.3),
            A.GaussianBlur(3, p=0.3),
            A.GaussNoise(30, p=0.3)
        ],
        keypoint_params=A.KeypointParams(format='xy'),
        additional_targets={
            'mask0': 'mask',
            'mask1': 'mask',
            'mask2': 'mask',
            'keypoints0': 'keypoints',
            'keypoints1': 'keypoints',
            'keypoints2': 'keypoints',
            'keypoints3': 'keypoints',
            'keypoints4': 'keypoints',
            'keypoints5': 'keypoints'
        })

    # random.seed(20201009)
    random_seed = random.randint(0, 20201009)
    train_set = HEDataset_Fast(data_dir=datadir,
                               n_classes=exp_dict["n_classes"],
                               transform=data_transform,
                               option="Train",
                               random_seed=random_seed,
                               obj_option=exp_dict["obj"],
                               patch_size=exp_dict["patch_size"],
                               bkg_option=exp_dict["bkg"])

    test_transform = A.Compose([A.Resize(1024, 1024)],
                               keypoint_params=A.KeypointParams(format='xy'),
                               additional_targets={
                                   'mask0': 'mask',
                                   'mask1': 'mask'
                               })
    # val set
    val_set = HEDataset(data_dir=datadir,
                        transform=test_transform,
                        option="Validation")

    val_loader = DataLoader(val_set, batch_size=1, num_workers=num_workers)

    # test set
    test_set = HEDataset(data_dir=datadir,
                         transform=test_transform,
                         option="Test")

    test_loader = DataLoader(test_set, batch_size=1, num_workers=num_workers)
    # Model
    # ==================

    # torch.manual_seed(20201009)
    model = models.get_model(exp_dict['model'],
                             exp_dict=exp_dict,
                             train_set=train_set).cuda()

    model_path = os.path.join(savedir, "model.pth")
    score_list_path = os.path.join(savedir, "score_list.pkl")

    if os.path.exists(score_list_path):
        # resume experiment
        model.load_state_dict(hu.torch_load(model_path))
        score_list = hu.load_pkl(score_list_path)
        s_epoch = score_list[-1]['epoch'] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # Train & Val
    # ==================
    print("Starting experiment at epoch %d" % (s_epoch))

    #     train_sampler = torch.utils.data.RandomSampler(
    #         train_set, replacement=True, num_samples=2*len(val_set))

    train_loader = DataLoader(train_set,
                              batch_size=exp_dict["batch_size"],
                              shuffle=True,
                              num_workers=num_workers)

    for e in range(s_epoch, exp_dict['max_epoch']):
        # Validate only at the start of each cycle
        score_dict = {}

        # Train the model
        train_dict = model.train_on_loader(train_loader)

        # Validate and Visualize the model
        val_dict = model.val_on_loader(val_loader,
                                       savedir_images=os.path.join(
                                           savedir, "images"),
                                       n_images=7)
        score_dict.update(val_dict)

        # Get new score_dict
        score_dict.update(train_dict)
        score_dict["epoch"] = len(score_list)

        # Add to score_list and save checkpoint
        score_list += [score_dict]

        # Report & Save
        score_df = pd.DataFrame(score_list)
        print("\n", score_df.tail(), "\n")
        hu.torch_save(model_path, model.get_state_dict())
        hu.save_pkl(score_list_path, score_list)
        print("Checkpoint Saved: %s" % savedir)

        # Save Best Checkpoint
        if e == 0 or (score_dict.get("val_score", 0) >
                      score_df["val_score"][:-1].fillna(0).max()):
            hu.save_pkl(os.path.join(savedir, "score_list_best.pkl"),
                        score_list)
            hu.torch_save(os.path.join(savedir, "model_best.pth"),
                          model.get_state_dict())
            print("Saved Best: %s" % savedir)

    # if s_epoch==exp_dict['max_epoch']:
    #     e = s_epoch
    model.load_state_dict(
        hu.torch_load(os.path.join(savedir, "model_best.pth")))
    test_dict = model.test_on_loader(test_loader)
    hu.save_pkl(os.path.join(savedir, 'test_iou.pkl'), test_dict)
    print('Test IoU:{}'.format(test_dict["test_iou"]))
    print('Experiment completed et epoch %d' % e)
Пример #12
0
    def __getitem__(self, index):
        # index = 0
        img_path = self.dataset.images[index]
        name = os.path.split(img_path)[-1].split('.')[0]

        img_pil = Image.open(img_path).convert("RGB")
        W, H = img_pil.size
        points_list = self.point_dict[name]
        points_mask = np.zeros((H, W))
        for p in points_list:
            if p['y'] >= H or p['x'] >= W:
                continue
            points_mask[int(p['y']), int(p['x'])] = p['cls']

        if self.supervision == 'full':
            mask_path = self.dataset.masks[index]
            if '.mat' in mask_path:
                mask_pil = Image.fromarray(
                    hu.load_mat(mask_path)['GTcls'][0]['Segmentation'][0])
            else:
                mask_pil = Image.open(mask_path)
            # mask_pil = hu.load_mat(mask_path)

            inst_path = self.dataset.masks[index].replace(
                'SegmentationClass', 'SegmentationObject')
            if '.mat' in inst_path:
                inst_pil = None
            else:
                inst_pil = Image.open(inst_path)

        elif self.supervision == 'seam':
            path_base = os.path.join(self.datadir, 'seam')
            os.makedirs(path_base, exist_ok=True)
            mask_path = os.path.join(path_base, 'masks', '%s_dict.pkl' % name)
            if not os.path.exists(mask_path):
                ut.generate_seam_segmentation(self, path_base=path_base)

            # mask_path = self.dataset.masks[index]
            mask_dict = hu.load_pkl(mask_path)

            if self.exp_dict.get('split_inst', False):
                blob_list, color_mask, inst_mask = get_blob_list_v2(
                    mask_dict, points_mask, img_pil)
            else:
                stop
            # if points_mask.sum() > 0:
            #     assert inst_mask.sum() > 1
            #     assert (inst_mask!=0).sum() == (color_mask!=0).sum()
            # hu.save_image('tmp.jpg', hi.mask_on_image(img_pil, inst_mask, add_bbox=True))

            mask_pil = Image.fromarray(color_mask)
            inst_pil = Image.fromarray(inst_mask)

        elif self.supervision == 'top_rpn':
            pm = proposals.ProposalManager(region_mode='rpn', n_regions=100)
            bbox_yxyx = pm.get_top_bbox_yxyx(img_pil, points_mask=points)

            mask = np.zeros((H, W), dtype='uint8')
            inst = np.zeros((H, W), dtype='uint8')

            for i, b in enumerate(bbox_yxyx):
                y1, x1, y2, x2 = map(int, b)
                assert (y2 <= H and x2 <= W)
                mask[y1:y2, x1:x2] = points[i]['cls']
                inst[y1:y2, x1:x2] = i + 1

            mask_pil = Image.fromarray(mask)
            inst_pil = Image.fromarray(inst)

        elif self.supervision == 'points_sharpmask':
            region_list = datasets.get_sharpmask(name)
            mask_list, c_list = datasets.get_mask_list(img_pil, points,
                                                       region_list)
            mask = np.zeros((H, W), dtype='uint8')
            inst = np.zeros((H, W), dtype='uint8')

            for i, m in enumerate(mask_list):
                mask[m == 1] = c_list[i]
                inst[m == 1] = i + 1

            mask_pil = Image.fromarray(mask)
            inst_pil = Image.fromarray(inst)

        elif self.supervision == 'points_irn':
            region_list = datasets.get_irn_regions(name)

            mask_list, c_list = datasets.get_mask_list(img_pil, points,
                                                       region_list)
            mask = np.zeros((H, W), dtype='uint8')
            inst = np.zeros((H, W), dtype='uint8')

            for i, m in enumerate(mask_list):
                mask[m == 1] = c_list[i]
                inst[m == 1] = i + 1

            mask_pil = Image.fromarray(mask)
            inst_pil = Image.fromarray(inst)

        elif self.supervision == 'points_slic':
            region_list = datasets.get_superpixels(img_pil, points)
            mask_list, c_list = datasets.get_mask_list(img_pil, points,
                                                       region_list)
            mask = np.zeros((H, W), dtype='uint8')
            inst = np.zeros((H, W), dtype='uint8')
            for i, m in enumerate(mask_list):
                mask[m == 1] = c_list[i]
                inst[m == 1] = i + 1

            mask_pil = Image.fromarray(mask)
            inst_pil = Image.fromarray(inst)

        else:
            raise ValueError('%s not found' % self.supervision)
        images = torchvision.transforms.ToTensor()(np.array(img_pil))
        mean = [0.485, 0.456, 0.406]
        std = [0.229, 0.224, 0.225]

        images = transforms.Normalize(mean=mean, std=std)(images)

        masks = torch.as_tensor(np.array(mask_pil))
        y_list, x_list = np.where(masks == 0)
        if len(y_list) > 0:
            yi, xi = datasets.get_random(y_list, x_list)
            points_list += [{'cls': 0, 'x': xi, 'y': yi}]
        batch = {
            "images": images,
            "img_pil": img_pil,
            'points': torch.as_tensor(points_mask),
            'point_list': points_list,
            # 'inst':inst,
            #  'flipped':flipped,
            "masks": masks,
            #  "original":inv_transform(images),
            "meta": {
                "index": index,
                'hash': hu.hash_dict({
                    'id': index,
                    'split': self.split
                }),
                "name": self.dataset.images[index],
                "size": images.shape[-2:],
                "image_id": index,
                "split": self.split
            }
        }

        return batch
    def __init__(self, split, datadir, exp_dict, mode='counting'):

        self.exp_dict = exp_dict

        if self.exp_dict['dataset']['mode'] == 'crowded':
            self.path = os.path.join(
                datadir, 'counting-crowded_n=100000_2020-Oct-19.h5py')

        elif self.exp_dict['dataset']['mode'] == 'fixed_scale':
            self.path = os.path.join(
                datadir, 'counting-fix-scale_n=100000_2020-Oct-19.h5py')

        elif ['no_overlap', 'overlap']:
            self.path = os.path.join(datadir,
                                     'counting_n=100000_2020-Oct-19.h5py')

        else:
            stop

        path_id = hu.hash_str(self.path)
        train_meta_fname = os.path.join(datadir,
                                        'train_meta_list_v1_%s.pkl' % path_id)
        val_meta_fname = os.path.join(datadir,
                                      'val_meta_list_v1_%s.pkl' % path_id)

        if not os.path.exists(train_meta_fname):
            meta = load_attributes_h5(self.path)
            meta_list, splits = meta
            for i, m in enumerate(meta_list):
                meta_list[i] = json.loads(m)
            for i, m in enumerate(meta_list):
                meta_list[i]['index'] = i
            meta_list = np.array(meta_list)
            train_split = splits['stratified_char'][:, 0]
            val_split = splits['stratified_char'][:, 1]
            # test_split = splits['stratified_char'][:, 2]

            train_meta_list = meta_list[train_split][:10000]
            val_meta_list = meta_list[val_split][:10000]

            hu.save_pkl(train_meta_fname, train_meta_list)
            hu.save_pkl(val_meta_fname, val_meta_list)
        # self.transform = None
        # load_minibatch_h5(self.path, [indices])
        # self.img_list = glob.glob(self.path+"/*.jpeg")
        self.split = split
        if split == 'train':
            self.meta_list = np.array(hu.load_pkl(train_meta_fname))
            n = int(0.9 * len(self.meta_list))
            self.meta_list = self.meta_list[:n]

        elif split == 'val':
            self.meta_list = np.array(hu.load_pkl(train_meta_fname))
            n = int(0.9 * len(self.meta_list))
            self.meta_list = self.meta_list[n:]

        elif split == 'test':
            self.meta_list = np.array(hu.load_pkl(val_meta_fname))

        if self.exp_dict['dataset']['mode'] == 'no_overlap':
            self.meta_list = [
                m for m in self.meta_list if m['overlap_score'] == 0
            ]

        elif self.exp_dict['dataset']['mode'] == 'overlap':
            self.meta_list = [
                m for m in self.meta_list if m['overlap_score'] > 0
            ]

        elif self.exp_dict['dataset']['mode'] in ['crowded', 'fixed_scale']:
            self.meta_list = self.meta_list
        else:
            stop

        symbol_dict = {}
        for i in range(len(self.meta_list)):
            meta = self.meta_list[i]
            for s in meta['symbols']:
                if s['char'] not in symbol_dict:
                    symbol_dict[s['char']] = []
                symbol_dict[s['char']] += [i]

        self.n_classes = 2
        self.symbol_dict = symbol_dict
        self.img_transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ])
Пример #14
0
def trainval(exp_dict,
             savedir_base,
             data_root,
             reset=False,
             wandb='None',
             wandb_key='None'):
    # bookkeeping
    # ---------------

    # get experiment directory
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)

    if reset:
        # delete and backup experiment
        hc.delete_experiment(savedir, backup_flag=True)

    # create folder and save the experiment dictionary
    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, "exp_dict.json"), exp_dict)
    print(exp_dict)
    print("Experiment saved in %s" % savedir)

    model_name = exp_dict['model'] + \
                "_lr_" + str(exp_dict['lr']) +\
                "_hs_" + str(exp_dict['backbone']['hidden_size']) +\
                "_pa_" + str(exp_dict['patience'])

    if exp_dict['model'] == 'MAML':
        model_name += "_ilr_" + str(exp_dict['inner_lr']) +\
                      "_nii_" + str(exp_dict['n_inner_iter'])

    #TODO add seed

    if wandb is not 'None':
        # https://docs.wandb.com/quickstart
        import wandb as logger
        if wandb_key is not 'None':
            logger.login(key=wandb_key)
        logger.init(project=wandb, group=model_name)
        logger.config.update(exp_dict)

    # Dataset
    # -----------
    train_dataset = get_dataset('train', data_root, exp_dict)
    val_dataset = get_dataset('val', data_root, exp_dict)
    test_dataset = get_dataset('test', data_root, exp_dict)
    if 'ood' in exp_dict['dataset']['task']:
        ood_dataset = get_dataset('ood', data_root, exp_dict)
        ood = True
    else:
        ood = False

    # train and val loader
    if exp_dict["episodic"] == False:
        train_loader = DataLoader(train_dataset,
                                  batch_size=exp_dict['batch_size'],
                                  shuffle=True,
                                  num_workers=args.num_workers)
        val_loader = DataLoader(val_dataset,
                                batch_size=exp_dict['batch_size'],
                                shuffle=True,
                                num_workers=args.num_workers)
        test_loader = DataLoader(test_dataset,
                                 batch_size=exp_dict['batch_size'],
                                 shuffle=True,
                                 num_workers=args.num_workers)
    else:  # to support episodes TODO: move inside each model
        from datasets.episodic_dataset import EpisodicDataLoader
        train_loader = EpisodicDataLoader(train_dataset,
                                          batch_size=exp_dict['batch_size'],
                                          shuffle=True,
                                          collate_fn=lambda x: x,
                                          num_workers=args.num_workers)
        val_loader = EpisodicDataLoader(val_dataset,
                                        batch_size=exp_dict['batch_size'],
                                        shuffle=True,
                                        collate_fn=lambda x: x,
                                        num_workers=args.num_workers)
        test_loader = EpisodicDataLoader(test_dataset,
                                         batch_size=exp_dict['batch_size'],
                                         shuffle=True,
                                         collate_fn=lambda x: x,
                                         num_workers=args.num_workers)
        if ood:
            ood_loader = EpisodicDataLoader(ood_dataset,
                                            batch_size=exp_dict['batch_size'],
                                            shuffle=True,
                                            collate_fn=lambda x: x,
                                            num_workers=args.num_workers)

    # Model
    # -----------
    model = get_model(exp_dict)

    # Checkpoint
    # -----------
    model_path = os.path.join(savedir, "model.pth")
    score_list_path = os.path.join(savedir, "score_list.pkl")

    if os.path.exists(score_list_path):
        # resume experiment
        model.set_state_dict(hu.torch_load(model_path))
        score_list = hu.load_pkl(score_list_path)
        s_epoch = score_list[-1]['epoch'] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    patience_counter = 0

    # Train & Val
    # ------------
    print("Starting experiment at epoch %d" % (s_epoch))

    for e in range(s_epoch, exp_dict['max_epoch']):
        score_dict = {}

        # Train the model
        score_dict.update(model.train_on_loader(train_loader))

        # Validate and Test the model
        score_dict.update(
            model.val_on_loader(val_loader,
                                mode='val',
                                savedir=os.path.join(
                                    savedir_base,
                                    exp_dict['dataset']['name'])))
        score_dict.update(model.val_on_loader(test_loader, mode='test'))
        if ood:
            score_dict.update(model.val_on_loader(ood_loader, mode='ood'))

        score_dict["epoch"] = e

        # Visualize the model
        # model.vis_on_loader(vis_loader, savedir=savedir+"/images/")

        # Test error at best validation:
        if score_dict["val_accuracy"] > model.best_val:

            score_dict["test_accuracy_at_best_val"] = score_dict[
                "test_accuracy"]
            score_dict["ood_accuracy_at_best_val"] = score_dict["ood_accuracy"]
            model.best_val = score_dict["val_accuracy"]
            patience_counter = 0

        # Add to score_list and save checkpoint
        score_list += [score_dict]

        # Report & Save
        score_df = pd.DataFrame(score_list)
        print("\n", score_df.tail())
        hu.torch_save(model_path, model.get_state_dict())
        hu.save_pkl(score_list_path, score_list)
        print("Checkpoint Saved: %s" % savedir)
        if wandb is not 'None':
            for key, values in score_dict.items():
                logger.log({key: values})

        patience_counter += 1

        # Patience:
        if patience_counter > exp_dict['patience'] * 3:
            print('training done, out of patience')
            break

    print('experiment completed')
Пример #15
0
def trainval(exp_dict, savedir_base, datadir, reset=False, num_workers=0):
    # bookkeepting stuff
    # ==================
    pprint.pprint(exp_dict)
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)
    if reset:
        hc.delete_and_backup_experiment(savedir)

    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, "exp_dict.json"), exp_dict)
    print("Experiment saved in %s" % savedir)

    # Dataset
    # ==================
    # train set
    train_set = datasets.get_dataset(dataset_dict=exp_dict["dataset"],
                                     split="train",
                                     datadir=datadir,
                                     exp_dict=exp_dict,
                                     dataset_size=exp_dict['dataset_size'])
    # val set
    val_set = datasets.get_dataset(dataset_dict=exp_dict["dataset"],
                                   split="val",
                                   datadir=datadir,
                                   exp_dict=exp_dict,
                                   dataset_size=exp_dict['dataset_size'])

    val_sampler = torch.utils.data.SequentialSampler(val_set)
    val_loader = DataLoader(val_set,
                            sampler=val_sampler,
                            batch_size=1,
                            num_workers=num_workers)
    # Model
    # ==================
    model = models.get_model(model_dict=exp_dict['model'],
                             exp_dict=exp_dict,
                             train_set=train_set).cuda()

    # model.opt = optimizers.get_optim(exp_dict['opt'], model)
    model_path = os.path.join(savedir, "model.pth")
    score_list_path = os.path.join(savedir, "score_list.pkl")

    if os.path.exists(score_list_path):
        # resume experiment
        model.load_state_dict(hu.torch_load(model_path))
        score_list = hu.load_pkl(score_list_path)
        s_epoch = score_list[-1]['epoch'] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # Train & Val
    # ==================
    print("Starting experiment at epoch %d" % (s_epoch))

    train_sampler = torch.utils.data.RandomSampler(train_set,
                                                   replacement=True,
                                                   num_samples=2 *
                                                   len(val_set))

    train_loader = DataLoader(train_set,
                              sampler=train_sampler,
                              batch_size=exp_dict["batch_size"],
                              drop_last=True,
                              num_workers=num_workers)

    for e in range(s_epoch, exp_dict['max_epoch']):
        # Validate only at the start of each cycle
        score_dict = {}

        # Train the model
        train_dict = model.train_on_loader(train_loader)

        # Validate and Visualize the model
        val_dict = model.val_on_loader(val_loader,
                                       savedir_images=os.path.join(
                                           savedir, "images"),
                                       n_images=3)
        score_dict.update(val_dict)
        # model.vis_on_loader(
        #     vis_loader, savedir=os.path.join(savedir, "images"))

        # Get new score_dict
        score_dict.update(train_dict)
        score_dict["epoch"] = len(score_list)

        # Add to score_list and save checkpoint
        score_list += [score_dict]

        # Report & Save
        score_df = pd.DataFrame(score_list)
        print("\n", score_df.tail(), "\n")
        hu.torch_save(model_path, model.get_state_dict())
        hu.save_pkl(score_list_path, score_list)
        print("Checkpoint Saved: %s" % savedir)

        # Save Best Checkpoint
        if e == 0 or (score_dict.get("val_score", 0) >
                      score_df["val_score"][:-1].fillna(0).max()):
            hu.save_pkl(os.path.join(savedir, "score_list_best.pkl"),
                        score_list)
            hu.torch_save(os.path.join(savedir, "model_best.pth"),
                          model.get_state_dict())
            print("Saved Best: %s" % savedir)

    print('Experiment completed et epoch %d' % e)
Пример #16
0
    # lcfcn loss with_affinity=True
    # hash_id = '84ced18cf5c1fb3ad5820cc1b55a38fa'

    # point level
    # hash_id = 'd7040c9534b08e765f48c6cb034b26b2'

    # LCFCN
    # hash_id = 'bcba046296675e9e3af5cd9f353d217b'
    for hash_id in hash_list:
        exp_dict = hu.load_json(
            os.path.join(savedir_base, hash_id, 'exp_dict.json'))
        fname = '.tmp/train_dict_%s.pkl' % hash_id
        datadir = '/mnt/public/datasets/DeepFish/'
        if os.path.exists(fname) and 0:
            train_dict = hu.load_pkl(fname)
        else:
            split = 'train'
            exp_dict['model']['count_mode'] = 0
            train_set = datasets.get_dataset(
                dataset_dict=exp_dict["dataset"],
                split=split,
                datadir=datadir,
                exp_dict=exp_dict,
                dataset_size=exp_dict['dataset_size'])
            train_loader = DataLoader(
                train_set,
                # sampler=val_sampler,
                batch_size=1,
                collate_fn=ut.collate_fn,
                num_workers=0)
Пример #17
0
def train(exp_dict, savedir_base, reset, compute_fid=False):
    # Book keeping
    pprint.pprint(exp_dict)
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)
    if reset:
        ut.rmtree(savedir)
    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict)
    print('Experiment saved in %s' % savedir)

    device = \
        torch.device('cuda:' + exp_dict['gpu'] if torch.cuda.is_available() else 'cpu')

    # 1. Load dataset and loader
    train_set, test_set, num_channels, num_train_classes, num_test_classes = \
        datasets.get_dataset(exp_dict['dataset'],
                             dataset_path=savedir_base,
                             image_size=exp_dict['image_size'])
    train_loader, test_loader = \
            dataloaders.get_dataloader(exp_dict['dataloader'],
                                       train_set, test_set, exp_dict)

    # 2. Fetch model to train
    model = models.get_model(exp_dict['model'], num_train_classes,
                             num_test_classes, num_channels, device, exp_dict)

    # 3. Resume experiment or start from scratch
    score_list_path = os.path.join(savedir, 'score_list.pkl')
    if os.path.exists(score_list_path):
        # Resume experiment if it exists
        model_path = os.path.join(savedir, 'model_state_dict.pth')
        model.load_state_dict(hu.torch_load(model_path))
        score_list = hu.load_pkl(score_list_path)
        meta_dict_path = os.path.join(savedir, 'meta_dict.pkl')
        meta_dict = hu.load_pkl(meta_dict_path)
        print('Resuming experiment at episode %d epoch %d' %
              (meta_dict['episode'], meta_dict['epoch']))
    else:
        # Start experiment from scratch
        meta_dict = {'episode': 1, 'epoch': 1}
        score_list = []

        # Remove TensorBoard logs from previous runs
        ut.rmtree(os.path.join(savedir, 'tensorboard_logs'))

        print('Starting experiment at episode %d epoch %d' %
              (meta_dict['episode'], meta_dict['epoch']))

    # 4. Train and eval loop
    s_epoch = meta_dict['epoch']
    for e in range(s_epoch, exp_dict['num_epochs'] + 1):
        # 0. Initialize dicts
        score_dict = {'epoch': e}
        meta_dict['epoch'] = e

        # 1. Train on loader
        train_dict = model.train_on_loader(train_loader)

        # 1b. Compute FID
        if compute_fid == 1:
            if e % 20 == 0 or e == 1 or e == exp_dict['num_epochs']:
                print('Starting FID computation...')
                train_dict['fid'] = fid(model, train_loader.dataset,
                                        train_loader.sampler, save_dir)

        score_dict.update(train_dict)

        # 2. Eval on loader
        eval_dict = model.val_on_loader(test_loader, savedir, e)
        score_dict.update(eval_dict)

        # 3. Report and save model state, optimizer state, and scores
        score_list += [score_dict]
        score_df = pd.DataFrame(score_list)
        print('\n', score_df.tail(), '\n')
        if e % 10 == 0:
            hu.torch_save(os.path.join(savedir, 'model_state_dict.pth'),
                          model.get_state_dict())
        hu.save_pkl(os.path.join(savedir, 'score_list.pkl'), score_list)
        hu.save_pkl(os.path.join(savedir, 'meta_dict.pkl'), meta_dict)
Пример #18
0
def trainval(exp_dict, savedir, args):
    """
    exp_dict: dictionary defining the hyperparameters of the experiment
    savedir: the directory where the experiment will be saved
    args: arguments passed through the command line
    """

    # set seed
    # ==================
    seed = 42
    np.random.seed(seed)
    torch.manual_seed(seed)

    #helen commented out the following lines to hard code in that the device was 'cpu' to resolve errors
    #if args.use_cuda:
    #device = 'cuda'
    #torch.cuda.manual_seed_all(seed)
    #assert torch.cuda.is_available(), 'cuda is not, available please run with "-c 0"'
    #else:
    device = 'cpu'

    print('Running on device: %s' % device)

    # Dataset
    # Load val set and train set
    val_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                   split="val",
                                   transform=exp_dict.get("transform"),
                                   datadir=args.datadir)
    train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                     split="train",
                                     transform=exp_dict.get("transform"),
                                     datadir=args.datadir)

    # Load train loader, val loader, and vis loader
    train_loader = DataLoader(train_set,
                              sampler=RandomSampler(
                                  train_set,
                                  replacement=True,
                                  num_samples=max(min(500, len(train_set)),
                                                  len(val_set))),
                              batch_size=exp_dict["batch_size"])

    val_loader = DataLoader(val_set,
                            shuffle=False,
                            batch_size=exp_dict["batch_size"])
    vis_loader = DataLoader(val_set,
                            sampler=ut.SubsetSampler(train_set,
                                                     indices=[0, 1, 2]),
                            batch_size=1)

    # Create model, opt, wrapper
    model_original = models.get_model(exp_dict["model"],
                                      exp_dict=exp_dict).cuda()
    opt = torch.optim.Adam(model_original.parameters(),
                           lr=1e-5,
                           weight_decay=0.0005)

    model = wrappers.get_wrapper(exp_dict["wrapper"],
                                 model=model_original,
                                 opt=opt).cuda()

    score_list = []

    # Checkpointing
    # =============
    #score_list_path = os.path.join(savedir, "score_list.pkl")      #helen commented out these three lines and hard coded the model and opt paths to resolve errors
    #model_path = os.path.join(savedir, "model_state_dict.pth")
    #opt_path = os.path.join(savedir, "opt_state_dict.pth")
    score_list_path = '/Users/helenpropson/Documents/git/marepesca/results/testresults/score_list.pkl'  #helen added this
    model_path = '/Users/helenpropson/Documents/git/marepesca/results/testresults/model_state_dict.pth'  #helen added this
    opt_path = '/Users/helenpropson/Documents/git/marepesca/results/testresults/opt_state_dict.pth'  #helen added this

    #helen hard coded that the experiment would resume instead of restarting from epoch 0
    #if os.path.exists(score_list_path):
    # resume experiment
    score_list = hu.load_pkl(
        score_list_path
    )  #helen changed this from ut.load_pkl to hu.load_pkl to resolve error
    model.load_state_dict(torch.load(model_path))
    opt.load_state_dict(torch.load(opt_path))
    s_epoch = score_list[-1]["epoch"] + 1

    #else:
    # restart experiment
    #score_list = []
    #s_epoch = 0

    # ***************            helen added this code
    im = Image.open("/Users/helenpropson/Documents/git/marepesca/tank.jpg")
    # im.show()  #this line will display the image you are running the model on if uncommented

    mean = [0.485, 0.456, 0.406]
    std = [0.229, 0.224, 0.225]
    normalize_transform = transforms.Normalize(mean=mean, std=std)

    data_transform = transforms.Compose(
        [transforms.ToTensor(),
         normalize_transform])  #transformations we will use on our image
    im_new = data_transform(
        im)  #transforms the image into a tensor and normalizes it
    im_final = im_new.unsqueeze(
        0)  #adds another dimension so image is the correct shape for the model
    print("now trying helen's code")  #print statement for debugging
    #model.vis_on_batch_helen(im_final, f'im_new')    #uncomment this line to run model on image

    # ***************            this is the end of helen's code

    # Run training and validation
    for epoch in range(s_epoch, exp_dict["max_epoch"]):
        score_dict = {"epoch": epoch}

        # visualize
        model.vis_on_loader(vis_loader,
                            savedir=os.path.join(savedir, "images"))

        print("after vis_on_loader"
              )  #helen add this print statement as an update while iterating

        # validate
        score_dict.update(model.val_on_loader(val_loader))

        print("after validate")

        # train
        score_dict.update(model.train_on_loader(train_loader))

        print("after train")

        # Add score_dict to score_list
        score_list += [score_dict]

        # Report and save
        print(pd.DataFrame(score_list).tail())
        hu.save_pkl(score_list_path, score_list)
        hu.torch_save(model_path, model.state_dict())
        hu.torch_save(opt_path, opt.state_dict())
        print("Saved in %s" % savedir)
Пример #19
0
def test(exp_dict,
         savedir_base,
         datadir,
         num_workers=0,
         model_path=None,
         scan_id=None):
    # bookkeepting stuff
    # ==================
    pprint.pprint(exp_dict)
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)

    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, "exp_dict.json"), exp_dict)
    print("Experiment saved in %s" % savedir)

    # Dataset
    # ==================
    # val set
    test_set = datasets.get_dataset(dataset_dict=exp_dict["dataset"],
                                    split="val",
                                    datadir=datadir,
                                    exp_dict=exp_dict,
                                    dataset_size=exp_dict['dataset_size'])
    if str(scan_id) != 'None':
        test_set.active_data = test_set.get_scan(scan_id)
    test_sampler = torch.utils.data.SequentialSampler(test_set)
    test_loader = DataLoader(test_set,
                             sampler=test_sampler,
                             batch_size=1,
                             collate_fn=ut.collate_fn,
                             num_workers=num_workers)

    # Model
    # ==================
    # chk = torch.load('best_model.ckpt')
    model = models.get_model_for_onnx_export(model_dict=exp_dict['model'],
                                             exp_dict=exp_dict,
                                             train_set=test_set).cuda()
    epoch = -1

    if str(model_path) != 'None':
        model_path = model_path
        model.load_state_dict(hu.torch_load(model_path))
    else:
        try:
            exp_dict_train = copy.deepcopy(exp_dict)
            del exp_dict_train['test_mode']
            savedir_train = os.path.join(savedir_base,
                                         hu.hash_dict(exp_dict_train))
            model_path = os.path.join(savedir_train, "model_best.pth")
            score_list = hu.load_pkl(
                os.path.join(savedir_train, 'score_list_best.pkl'))
            epoch = score_list[-1]['epoch']
            print('Loaded model at epoch %d with score %.3f' % epoch)
            model.load_state_dict(hu.torch_load(model_path))
        except:
            pass

    s_time = time.time()
    savedir_images = os.path.join(savedir, 'images')

    # delete image folder if exists
    if os.path.exists(savedir_images):
        shutil.rmtree(savedir_images)

    os.makedirs(savedir_images, exist_ok=True)
    # for i in range(20):
    #     score_dict = model.train_on_loader(test_loader)
    score_dict = model.val_on_loader(test_loader,
                                     savedir_images=savedir_images,
                                     n_images=30000,
                                     save_preds=True)

    score_dict['epoch'] = epoch
    score_dict["time"] = time.time() - s_time
    score_dict["saved_at"] = hu.time_to_montreal()
    # save test_score_list
    test_path = os.path.join(savedir, "score_list.pkl")
    if os.path.exists(test_path):
        test_score_list = [
            sd for sd in hu.load_pkl(test_path) if sd['epoch'] != epoch
        ]
    else:
        test_score_list = []

    # append score_dict to last result
    test_score_list += [score_dict]
    hu.save_pkl(test_path, test_score_list)
    print('Final Score is ', str(score_dict["val_score"]) + "\n")
Пример #20
0
    def __init__(self, model, n_classes, exp_dict, pretrained_savedir=None, savedir_base=None):
        """ Constructor
        Args:
            model: architecture to train
            exp_dict: reference to dictionary with the global state of the application
        """
        super().__init__()
        self.model = model
        self.exp_dict = exp_dict 
        self.ngpu = self.exp_dict["ngpu"]
        self.predict_method = exp_dict['predict_method']

        self.model.add_classifier(n_classes, modalities=0)
        self.nclasses = n_classes

        best_accuracy = -1 
        self.label = exp_dict['model']['backbone'] + "_" + exp_dict['dataset_test'].split('_')[1].replace('-imagenet','')
        print('=============')
        print('dataset:', exp_dict["dataset_train"].split('_')[-1]) 
        print('backbone:', exp_dict['model']["backbone"])
        print('n_classes:', exp_dict['n_classes'])
        print('support_size_train:', exp_dict['support_size_train'])

        if pretrained_savedir is None:
            # find the best checkpoint
            savedir_base = exp_dict["finetuned_weights_root"]
            if not os.path.exists(savedir_base):
                raise ValueError("Please set the variable named \
                    'finetuned_weights_root' with the path of the folder \
                    with the episodic finetuning experiments")
            for exp_hash in os.listdir(savedir_base):
                base_path = os.path.join(savedir_base, exp_hash)
                exp_dict_path = os.path.join(base_path, 'exp_dict.json')
                if not os.path.exists(exp_dict_path):
                    continue
                loaded_exp_dict = hu.load_json(exp_dict_path)
                pkl_path = os.path.join(base_path, 'score_list_best.pkl')

                if exp_dict['support_size_train'] in [2,3,4]:
                    support_size_needed = 1
                else:
                    support_size_needed = exp_dict['support_size_train']

                if (loaded_exp_dict["model"]["name"] == 'finetuning' and 
                    loaded_exp_dict["dataset_train"].split('_')[-1] == exp_dict["dataset_train"].split('_')[-1] and 
                    loaded_exp_dict["model"]["backbone"] == exp_dict['model']["backbone"] and
                    loaded_exp_dict['n_classes'] == exp_dict["n_classes"] and
                    loaded_exp_dict['support_size_train'] == support_size_needed,
                    loaded_exp_dict["embedding_prop"] == exp_dict["embedding_prop"]):
                    
                    model_path = os.path.join(base_path, 'checkpoint_best.pth')

                    try:
                        print("Attempting to load ", model_path)
                        accuracy = hu.load_pkl(pkl_path)[-1]["val_accuracy"]
                        self.model.load_state_dict(torch.load(model_path)['model'], strict=False)
                        if accuracy > best_accuracy:
                            best_path = os.path.join(base_path, 'checkpoint_best.pth')
                            best_accuracy = accuracy
                    except Exception as e:
                        print(e)
                   
            assert(best_accuracy > 0.1)
            print("Finetuning %s with original accuracy : %f" %(base_path, best_accuracy))
            self.model.load_state_dict(torch.load(best_path)['model'], strict=False)
        self.best_accuracy = best_accuracy
        self.acc_sum = 0.0
        self.n_count = 0
        self.model.cuda()
Пример #21
0
        if points.sum() == 0:
            continue
        savedir_image = os.path.join('.tmp/qualitative/%d.png' % (i))
        img = hu.denormalize(batch['images'], mode='rgb')
        img_org = np.array(
            hu.save_image(savedir_image,
                          img,
                          mask=batch['masks'].numpy(),
                          return_image=True))

        img_list = [img_org]
        with torch.no_grad():
            for hash_id in hash_list:
                score_path = os.path.join(savedir_base, hash_id,
                                          'score_list_best.pkl')
                score_list = hu.load_pkl(score_path)

                exp_dict = hu.load_json(
                    os.path.join(savedir_base, hash_id, 'exp_dict.json'))
                print(i, exp_dict['model']['loss'],
                      exp_dict['model'].get('with_affinity'), 'score:',
                      score_list[-1]['test_class1'])

                model = models.get_model(model_dict=exp_dict['model'],
                                         exp_dict=exp_dict,
                                         train_set=test_set).cuda()

                model_path = os.path.join(savedir_base, hash_id,
                                          'model_best.pth')
                model.load_state_dict(hu.torch_load(model_path),
                                      with_opt=False)
Пример #22
0
def trainval(exp_dict, savedir_base, reset=False):
    # bookkeeping
    # ---------------

    # get experiment directory
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)

    if reset:
        # delete and backup experiment
        hc.delete_experiment(savedir, backup_flag=True)

    # create folder and save the experiment dictionary
    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict)
    pprint.pprint(exp_dict)
    print('Experiment saved in %s' % savedir)

    # set seed
    # ---------------
    seed = 42 + exp_dict['runs']
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    # Dataset
    # -----------

    # train loader
    train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                     train_flag=True,
                                     datadir=savedir_base,
                                     exp_dict=exp_dict)

    train_loader = torch.utils.data.DataLoader(
        train_set,
        drop_last=True,
        shuffle=True,
        batch_size=exp_dict["batch_size"])

    # val set
    val_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                   train_flag=False,
                                   datadir=savedir_base,
                                   exp_dict=exp_dict)

    # Model
    # -----------
    model = models.get_model(exp_dict["model"], train_set=train_set).cuda()
    # Choose loss and metric function
    loss_function = metrics.get_metric_function(exp_dict["loss_func"])

    # Compute fstar
    # -------------
    if exp_dict['opt'].get('fstar_flag'):
        ut.compute_fstar(train_set, loss_function, savedir_base, exp_dict)

    # Load Optimizer
    n_batches_per_epoch = len(train_set) / float(exp_dict["batch_size"])
    opt = optimizers.get_optimizer(opt_dict=exp_dict["opt"],
                                   params=model.parameters(),
                                   n_batches_per_epoch=n_batches_per_epoch)

    # Checkpoint
    # -----------
    model_path = os.path.join(savedir, 'model.pth')
    score_list_path = os.path.join(savedir, 'score_list.pkl')
    opt_path = os.path.join(savedir, 'opt_state_dict.pth')

    if os.path.exists(score_list_path):
        # resume experiment
        score_list = hu.load_pkl(score_list_path)
        model.load_state_dict(torch.load(model_path))
        opt.load_state_dict(torch.load(opt_path))
        s_epoch = score_list[-1]['epoch'] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # Train & Val
    # ------------
    print('Starting experiment at epoch %d/%d' %
          (s_epoch, exp_dict['max_epoch']))

    for e in range(s_epoch, exp_dict['max_epoch']):
        # Set seed
        seed = e + exp_dict['runs']
        np.random.seed(seed)
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)

        score_dict = {}

        # Compute train loss over train set
        score_dict["train_loss"] = metrics.compute_metric_on_dataset(
            model, train_set, metric_name=exp_dict["loss_func"])

        # Compute val acc over val set
        score_dict["val_acc"] = metrics.compute_metric_on_dataset(
            model, val_set, metric_name=exp_dict["acc_func"])

        # Train over train loader
        model.train()
        print("%d - Training model with %s..." % (e, exp_dict["loss_func"]))

        # train and validate
        s_time = time.time()
        for batch in tqdm.tqdm(train_loader):
            images, labels = batch["images"].cuda(), batch["labels"].cuda()

            opt.zero_grad()

            # closure
            def closure():
                return loss_function(model, images, labels, backwards=True)

            opt.step(closure)

        e_time = time.time()

        # Record metrics
        score_dict["epoch"] = e
        score_dict["step_size"] = opt.state["step_size"]
        score_dict["step_size_avg"] = opt.state["step_size_avg"]
        score_dict["n_forwards"] = opt.state["n_forwards"]
        score_dict["n_backwards"] = opt.state["n_backwards"]
        score_dict["grad_norm"] = opt.state["grad_norm"]
        score_dict["batch_size"] = train_loader.batch_size
        score_dict["train_epoch_time"] = e_time - s_time

        score_list += [score_dict]

        # Report and save
        print(pd.DataFrame(score_list).tail())
        hu.save_pkl(score_list_path, score_list)
        hu.torch_save(model_path, model.state_dict())
        hu.torch_save(opt_path, opt.state_dict())
        print("Saved: %s" % savedir)

    print('Experiment completed')
Пример #23
0
def trainval(exp_dict, savedir_base, reset=False):
    # bookkeeping
    # ---------------

    # get experiment directory
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)

    if reset:
        # delete and backup experiment
        hc.delete_experiment(savedir, backup_flag=True)

    # create folder and save the experiment dictionary
    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict)
    pprint.pprint(exp_dict)
    print('Experiment saved in %s' % savedir)

    # Dataset
    # -----------

    # train loader
    train_loader = datasets.get_loader(dataset_name=exp_dict['dataset'],
                                       datadir=savedir_base,
                                       split='train')

    # val loader
    val_loader = datasets.get_loader(dataset_name=exp_dict['dataset'],
                                     datadir=savedir_base,
                                     split='val')

    # Model
    # -----------
    model = models.get_model(model_name=exp_dict['model'])

    # Checkpoint
    # -----------
    model_path = os.path.join(savedir, 'model.pth')
    score_list_path = os.path.join(savedir, 'score_list.pkl')

    if os.path.exists(score_list_path):
        # resume experiment
        model.set_state_dict(hu.torch_load(model_path))
        score_list = hu.load_pkl(score_list_path)
        s_epoch = score_list[-1]['epoch'] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # Train & Val
    # ------------
    print('Starting experiment at epoch %d' % (s_epoch))

    for e in range(s_epoch, 10):
        score_dict = {}

        # Train the model
        train_dict = model.train_on_loader(train_loader)

        # Validate the model
        val_dict = model.val_on_loader(val_loader)

        # Get metrics
        score_dict['train_loss'] = train_dict['train_loss']
        score_dict['val_acc'] = val_dict['val_acc']
        score_dict['epoch'] = e

        # Add to score_list and save checkpoint
        score_list += [score_dict]

        # Report & Save
        score_df = pd.DataFrame(score_list)
        print(score_df.tail())
        hu.torch_save(model_path, model.get_state_dict())
        hu.save_pkl(score_list_path, score_list)
        print('Checkpoint Saved: %s' % savedir)

    print('experiment completed')
def trainval(exp_dict, savedir_base, data_root, reset=False, test_only=False):
    # bookkeeping
    # ---------------

    # get experiment directory
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)

    np.random.seed(exp_dict["seed"])
    torch.manual_seed(exp_dict["seed"])

    if reset:
        # delete and backup experiment
        hc.delete_experiment(savedir, backup_flag=True)

    # create folder and save the experiment dictionary
    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, "exp_dict.json"), exp_dict)
    pprint.pprint(exp_dict)
    print("Experiment saved in %s" % savedir)

    # Dataset
    # -----------
    # train and val loader
    if exp_dict["episodic"] == False:
        if (int(test_only) == 0):
            train_dataset, val_dataset, test_dataset = get_dataset(
                ['train', 'val', 'test'], data_root, exp_dict)
            train_loader = DataLoader(train_dataset,
                                      batch_size=exp_dict['batch_size'],
                                      shuffle=True,
                                      num_workers=args.num_workers)
            val_loader = DataLoader(val_dataset,
                                    batch_size=exp_dict['batch_size'],
                                    shuffle=True,
                                    num_workers=args.num_workers)
            test_loader = DataLoader(test_dataset,
                                     batch_size=exp_dict['batch_size'],
                                     shuffle=True,
                                     num_workers=args.num_workers)
            if hasattr(train_dataset, "mask"):
                # assert((train_dataset.mask == val_dataset.mask).all())
                # assert((train_dataset.mask == test_dataset.mask).all())
                np.save(os.path.join(savedir, "mask.npy"), train_dataset.mask)
        else:
            test_dataset, = get_dataset(['test'], exp_dict)
            test_loader = DataLoader(test_dataset,
                                     batch_size=exp_dict['batch_size'],
                                     shuffle=True,
                                     num_workers=args.num_workers)
    else:  # to support episodes TODO: move inside each model
        from datasets.episodic_dataset import EpisodicDataLoader
        train_loader = EpisodicDataLoader(train_dataset,
                                          batch_size=exp_dict['batch_size'],
                                          shuffle=True,
                                          collate_fn=lambda x: x,
                                          num_workers=args.num_workers)
        val_loader = EpisodicDataLoader(val_dataset,
                                        batch_size=exp_dict['batch_size'],
                                        shuffle=True,
                                        collate_fn=lambda x: x,
                                        num_workers=args.num_workers)

    # Model
    # -----------
    model = get_model(exp_dict)
    print("Parameters: ", sum([torch.numel(v) for v in model.parameters()]))

    # Checkpoint
    # -----------
    model_path = os.path.join(savedir, "model.pth")
    score_list_path = os.path.join(savedir, "score_list.pkl")

    if os.path.exists(score_list_path):
        # resume experiment
        print("Resuming from", model_path)
        model.set_state_dict(hu.torch_load(model_path))
        score_list = hu.load_pkl(score_list_path)
        s_epoch = score_list[-1]['epoch'] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    if int(test_only) == 0:
        # Train & Val
        # ------------
        print("Starting experiment at epoch %d" % (s_epoch))

        for e in range(s_epoch, exp_dict['max_epoch']):
            score_dict = {}

            # Train the model
            score_dict.update(model.train_on_loader(train_loader))

            # Validate the model
            score_dict.update(
                model.val_on_loader(val_loader,
                                    savedir=os.path.join(
                                        savedir_base,
                                        exp_dict['dataset']['name'])))
            score_dict["epoch"] = e

            # Visualize the model
            # model.vis_on_loader(vis_loader, savedir=savedir+"/images/")

            # Add to score_list and save checkpoint
            score_list += [score_dict]

            # Report & Save
            score_df = pd.DataFrame(score_list)
            print("\n", score_df.tail())
            hu.torch_save(model_path, model.get_state_dict())
            hu.save_pkl(score_list_path, score_list)
            print("Checkpoint Saved: %s" % savedir)

            if model.is_end():
                print("Early stopping")
                break
        print('experiment completed')

        print("Testing...")
        score_dict = model.test_on_loader(train_loader, tag="train")
        score_dict.update(model.test_on_loader(val_loader, tag="val"))
        score_dict.update(model.test_on_loader(test_loader, tag="test"))
        # Report & Save
        score_list_path = os.path.join(savedir, "score_list_test.pkl")
        hu.save_pkl(score_list_path, score_dict)
    else:
        print("Testing...")
        score_dict = model.test_on_loader(test_loader, "test")
        # Report & Save
        score_list_path = os.path.join(savedir, "score_list_test.pkl")
        hu.save_pkl(score_list_path, score_dict)
Пример #25
0
def trainval(exp_dict, savedir, args):
    """
    exp_dict: dictionary defining the hyperparameters of the experiment
    savedir: the directory where the experiment will be saved
    args: arguments passed through the command line
    """

    # set seed
    # ==================
    seed = 42
    np.random.seed(seed)
    torch.manual_seed(seed)
    if args.use_cuda:
        device = 'cuda'
        torch.cuda.manual_seed_all(seed)
        assert torch.cuda.is_available(
        ), 'cuda is not, available please run with "-c 0"'
    else:
        device = 'cpu'

    print('Running on device: %s' % device)

    # Dataset
    # Load val set and train set
    val_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                   split="val",
                                   transform=exp_dict.get("transform"),
                                   datadir=args.datadir)
    train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                     split="train",
                                     transform=exp_dict.get("transform"),
                                     datadir=args.datadir)

    # Load train loader, val loader, and vis loader
    train_loader = DataLoader(train_set,
                              sampler=RandomSampler(
                                  train_set,
                                  replacement=True,
                                  num_samples=max(min(500, len(train_set)),
                                                  len(val_set))),
                              batch_size=exp_dict["batch_size"])

    val_loader = DataLoader(val_set,
                            shuffle=False,
                            batch_size=exp_dict["batch_size"])
    vis_loader = DataLoader(val_set,
                            sampler=ut.SubsetSampler(train_set,
                                                     indices=[0, 1, 2]),
                            batch_size=1)

    # Create model, opt, wrapper
    model_original = models.get_model(exp_dict["model"],
                                      exp_dict=exp_dict).cuda()
    opt = torch.optim.Adam(model_original.parameters(),
                           lr=1e-5,
                           weight_decay=0.0005)

    model = wrappers.get_wrapper(exp_dict["wrapper"],
                                 model=model_original,
                                 opt=opt).cuda()

    score_list = []

    # Checkpointing
    # =============
    score_list_path = os.path.join(savedir, "score_list.pkl")
    model_path = os.path.join(savedir, "model_state_dict.pth")
    opt_path = os.path.join(savedir, "opt_state_dict.pth")

    if os.path.exists(score_list_path):
        # resume experiment
        score_list = hu.load_pkl(score_list_path)
        model.load_state_dict(torch.load(model_path))
        opt.load_state_dict(torch.load(opt_path))
        s_epoch = score_list[-1]["epoch"] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # Run training and validation
    for epoch in range(s_epoch, exp_dict["max_epoch"]):
        score_dict = {"epoch": epoch}

        # visualize
        model.vis_on_loader(vis_loader,
                            savedir=os.path.join(savedir, "images"))
        # validate
        score_dict.update(model.val_on_loader(val_loader))

        # train
        score_dict.update(model.train_on_loader(train_loader))

        # Add score_dict to score_list
        score_list += [score_dict]

        # Report and save
        print(pd.DataFrame(score_list).tail())
        hu.save_pkl(score_list_path, score_list)
        hu.torch_save(model_path, model.state_dict())
        hu.torch_save(opt_path, opt.state_dict())
        print("Saved in %s" % savedir)
Пример #26
0
def trainval(exp_dict,
             savedir_base,
             reset=False,
             num_workers=0,
             run_ssl=False):
    # bookkeeping
    # ---------------

    # get experiment directory
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)

    if reset:
        # delete and backup experiment
        hc.delete_experiment(savedir, backup_flag=True)

    # create folder and save the experiment dictionary
    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict)
    pprint.pprint(exp_dict)
    print('Experiment saved in %s' % savedir)

    # load datasets
    # ==========================
    train_set = datasets.get_dataset(
        dataset_name=exp_dict["dataset_train"],
        data_root=exp_dict["dataset_train_root"],
        split="train",
        transform=exp_dict["transform_train"],
        classes=exp_dict["classes_train"],
        support_size=exp_dict["support_size_train"],
        query_size=exp_dict["query_size_train"],
        n_iters=exp_dict["train_iters"],
        unlabeled_size=exp_dict["unlabeled_size_train"])

    val_set = datasets.get_dataset(
        dataset_name=exp_dict["dataset_val"],
        data_root=exp_dict["dataset_val_root"],
        split="val",
        transform=exp_dict["transform_val"],
        classes=exp_dict["classes_val"],
        support_size=exp_dict["support_size_val"],
        query_size=exp_dict["query_size_val"],
        n_iters=exp_dict["val_iters"],
        unlabeled_size=exp_dict["unlabeled_size_val"])

    test_set = datasets.get_dataset(
        dataset_name=exp_dict["dataset_test"],
        data_root=exp_dict["dataset_test_root"],
        split="test",
        transform=exp_dict["transform_val"],
        classes=exp_dict["classes_test"],
        support_size=exp_dict["support_size_test"],
        query_size=exp_dict["query_size_test"],
        n_iters=exp_dict["test_iters"],
        unlabeled_size=exp_dict["unlabeled_size_test"])

    # get dataloaders
    # ==========================
    train_loader = torch.utils.data.DataLoader(
        train_set,
        batch_size=exp_dict["batch_size"],
        shuffle=True,
        num_workers=num_workers,
        collate_fn=ut.get_collate(exp_dict["collate_fn"]),
        drop_last=True)
    val_loader = torch.utils.data.DataLoader(val_set,
                                             batch_size=1,
                                             shuffle=False,
                                             num_workers=num_workers,
                                             collate_fn=lambda x: x,
                                             drop_last=True)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=num_workers,
                                              collate_fn=lambda x: x,
                                              drop_last=True)

    # create model and trainer
    # ==========================

    # Create model, opt, wrapper
    backbone = backbones.get_backbone(
        backbone_name=exp_dict['model']["backbone"], exp_dict=exp_dict)
    model = models.get_model(model_name=exp_dict["model"]['name'],
                             backbone=backbone,
                             n_classes=exp_dict["n_classes"],
                             exp_dict=exp_dict)

    if run_ssl:
        # runs the SSL experiments
        score_list_path = os.path.join(savedir, 'score_list.pkl')
        if not os.path.exists(score_list_path):
            test_dict = model.test_on_loader(test_loader, max_iter=None)
            hu.save_pkl(score_list_path, [test_dict])
        return

    # Checkpoint
    # -----------
    checkpoint_path = os.path.join(savedir, 'checkpoint.pth')
    score_list_path = os.path.join(savedir, 'score_list.pkl')

    if os.path.exists(score_list_path):
        # resume experiment
        model.load_state_dict(hu.torch_load(checkpoint_path))
        score_list = hu.load_pkl(score_list_path)
        s_epoch = score_list[-1]['epoch'] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # Run training and validation
    for epoch in range(s_epoch, exp_dict["max_epoch"]):
        score_dict = {"epoch": epoch}
        score_dict.update(model.get_lr())

        # train
        score_dict.update(model.train_on_loader(train_loader))

        # validate
        score_dict.update(model.val_on_loader(val_loader))
        score_dict.update(model.test_on_loader(test_loader))

        # Add score_dict to score_list
        score_list += [score_dict]

        # Report
        score_df = pd.DataFrame(score_list)
        print(score_df.tail())

        # Save checkpoint
        hu.save_pkl(score_list_path, score_list)
        hu.torch_save(checkpoint_path, model.get_state_dict())
        print("Saved: %s" % savedir)

        if "accuracy" in exp_dict["target_loss"]:
            is_best = score_dict[exp_dict["target_loss"]] >= score_df[
                exp_dict["target_loss"]][:-1].max()
        else:
            is_best = score_dict[exp_dict["target_loss"]] <= score_df[
                exp_dict["target_loss"]][:-1].min()

        # Save best checkpoint
        if is_best:
            hu.save_pkl(os.path.join(savedir, "score_list_best.pkl"),
                        score_list)
            hu.torch_save(os.path.join(savedir, "checkpoint_best.pth"),
                          model.get_state_dict())
            print("Saved Best: %s" % savedir)

        # Check for end of training conditions
        if model.is_end_of_training():
            break
Пример #27
0
def newminimum(exp_id,
               savedir_base,
               datadir,
               name,
               exp_dict,
               metrics_flag=True):
    # bookkeeping
    # ---------------

    # get experiment directory
    old_modeldir = os.path.join(savedir_base, exp_id)
    savedir = os.path.join(savedir_base, exp_id, name)

    old_exp_dict = hu.load_json(os.path.join(old_modeldir, 'exp_dict.json'))

    # TODO: compare exp dict for possible errors:
    # optimizer have to be the same
    # same network, dataset

    # create folder and save the experiment dictionary
    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, 'exp_dict.json'), exp_dict)
    pprint.pprint(exp_dict)
    print('Experiment saved in %s' % savedir)

    # set seed
    # ---------------
    seed = 42 + exp_dict['runs']
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)

    # Dataset
    # -----------

    # Load Train Dataset
    train_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                     train_flag=True,
                                     datadir=datadir,
                                     exp_dict=exp_dict)

    train_loader = torch.utils.data.DataLoader(
        train_set,
        drop_last=True,
        shuffle=True,
        batch_size=exp_dict["batch_size"])

    # Load Val Dataset
    val_set = datasets.get_dataset(dataset_name=exp_dict["dataset"],
                                   train_flag=False,
                                   datadir=datadir,
                                   exp_dict=exp_dict)

    # Model
    # -----------
    model = models.get_model(exp_dict["model"], train_set=train_set)

    # Choose loss and metric function
    loss_function = metrics.get_metric_function(exp_dict["loss_func"])

    # Load Optimizer
    n_batches_per_epoch = len(train_set) / float(exp_dict["batch_size"])
    opt = optimizers.get_optimizer(opt=exp_dict["opt"],
                                   params=model.parameters(),
                                   n_batches_per_epoch=n_batches_per_epoch)

    # Checkpoint
    # -----------
    model_path = os.path.join(savedir, 'model.pth')
    score_list_path = os.path.join(savedir, 'score_list.pkl')
    opt_path = os.path.join(savedir, 'opt_state_dict.pth')

    old_model_path = os.path.join(old_modeldir, 'model.pth')
    old_score_list_path = os.path.join(old_modeldir, 'score_list.pkl')
    old_opt_path = os.path.join(old_modeldir, 'opt_state_dict.pth')

    score_list = hu.load_pkl(old_score_list_path)
    model.load_state_dict(torch.load(old_model_path))
    opt.load_state_dict(torch.load(old_opt_path))
    s_epoch = score_list[-1]['epoch'] + 1

    # save current model state for comparison
    minimum = []

    for param in model.parameters():
        minimum.append(param.clone())

    # Train & Val
    # ------------
    print('Starting experiment at epoch %d/%d' %
          (s_epoch, exp_dict['max_epoch']))

    for epoch in range(s_epoch, exp_dict['max_epoch']):
        # Set seed
        np.random.seed(exp_dict['runs'] + epoch)
        torch.manual_seed(exp_dict['runs'] + epoch)
        # torch.cuda.manual_seed_all(exp_dict['runs']+epoch) not needed since no cuda available

        score_dict = {"epoch": epoch}

        if metrics_flag:
            # 1. Compute train loss over train set
            score_dict["train_loss"] = metrics.compute_metric_on_dataset(
                model, train_set, metric_name='softmax_loss')
            #                                    metric_name=exp_dict["loss_func"])
            # TODO: which loss should be used? (normal or with reguralizer?)

            # 2. Compute val acc over val set
            score_dict["val_acc"] = metrics.compute_metric_on_dataset(
                model, val_set, metric_name=exp_dict["acc_func"])

        # 3. Train over train loader
        model.train()
        print("%d - Training model with %s..." %
              (epoch, exp_dict["loss_func"]))

        s_time = time.time()
        for images, labels in tqdm.tqdm(train_loader):
            # images, labels = images.cuda(), labels.cuda() no cuda available

            opt.zero_grad()
            loss = loss_function(model, images, labels, minimum,
                                 0.1)  # just works for custom loss function
            loss.backward()
            opt.step()

        e_time = time.time()

        # Record metrics
        score_dict["step_size"] = opt.state["step_size"]
        score_dict["n_forwards"] = opt.state["n_forwards"]
        score_dict["n_backwards"] = opt.state["n_backwards"]
        score_dict["batch_size"] = train_loader.batch_size
        score_dict["train_epoch_time"] = e_time - s_time

        score_list += [score_dict]

        # Report and save
        print(pd.DataFrame(score_list).tail())
        hu.save_pkl(score_list_path, score_list)
        hu.torch_save(model_path, model.state_dict())
        hu.torch_save(opt_path, opt.state_dict())
        print("Saved: %s" % savedir)

        with torch.nograd():
            print('Current distance: %f',
                  metrics.computedistance(minimum, model))

    print('Experiment completed')
Пример #28
0
    def __init__(self, model, n_classes, exp_dict):
        """ Constructor
        Args:
            model: architecture to train
            exp_dict: reference to dictionary with the global state of the application
        """
        super().__init__()
        self.model = model
        self.exp_dict = exp_dict
        self.ngpu = self.exp_dict["ngpu"]
        self.predict_method = exp_dict['predict_method']

        self.model.add_classifier(n_classes, modalities=0)
        self.nclasses = n_classes

        if self.exp_dict["rotation_weight"] > 0:
            self.model.add_classifier(4, "classifier_rot")

        best_accuracy = -1
        self.label = exp_dict['model']['backbone'] + "_" + exp_dict[
            'dataset_test'].split('_')[1].replace('-imagenet', '')

        if self.exp_dict["pretrained_weights_root"] == 'tinder':
            best_scores = np.load(
                '/mnt/datasets/public/research/adaptron_laplace/best_scores.npy',
                allow_pickle=True)
            for r in best_scores:
                backbone_best = r[3]
                dataset_best = r[4]
                savedir_best = r[-1]
                best_accuracy = r[0]
                shot_best = r[2]
                if (exp_dict['model']['backbone'] == backbone_best
                        and exp_dict['dataset_test'] == dataset_best
                        and 5 == shot_best):
                    self.best_accuracy = best_accuracy
                    self.model.load_state_dict(
                        torch.load(
                            os.path.join(savedir_best,
                                         'checkpoint_best.pth'))['model'])

                    break

        elif self.exp_dict["pretrained_weights_root"] == 'csv':
            best_scores = np.load(
                '/mnt/datasets/public/research/adaptron_laplace/best_scores.npy',
                allow_pickle=True)
            for r in best_scores:
                backbone_best = r[3]
                dataset_best = r[4]
                savedir_best = r[-1]
                best_accuracy = r[0]
                shot_best = r[2]
                if (exp_dict['model']['backbone'] == backbone_best
                        and exp_dict['dataset_test'] == dataset_best
                        and exp_dict['support_size_test'] == shot_best):
                    self.best_accuracy = best_accuracy
                    self.model.load_state_dict(
                        torch.load(
                            os.path.join(savedir_best,
                                         'checkpoint_best.pth'))['model'])

                    break

        elif self.exp_dict["pretrained_weights_root"] == 'hdf5':
            fdir = '/mnt/datasets/public/research/adaptron_laplace/embeddings/finetuned'
            fpos = "%s_1shot_fine_*/test.h5" % (self.label)

            embeddings_fname = glob.glob(os.path.join(fdir, fpos))[0]
            self.best_accuracy = float(
                embeddings_fname.split('/')[-2].split('_')[-1]) / 100.
            self.sampler = oracle.Sampler(embeddings_fname=embeddings_fname,
                                          n_classes=exp_dict['classes_test'],
                                          distract_flag=exp_dict.get(
                                              'distract_flag', False))

        elif self.exp_dict["pretrained_weights_root"] is not None:
            for exp_hash in os.listdir(
                    self.exp_dict['pretrained_weights_root']):
                base_path = os.path.join(
                    self.exp_dict['pretrained_weights_root'], exp_hash)
                exp_dict_path = os.path.join(base_path, 'exp_dict.json')
                if not os.path.exists(exp_dict_path):
                    continue
                loaded_exp_dict = haven.load_json(exp_dict_path)
                pkl_path = os.path.join(base_path, 'score_list_best.pkl')
                if not os.path.exists(pkl_path):
                    continue
                if (loaded_exp_dict["model"]["name"] == 'finetuning'
                        and loaded_exp_dict["dataset_train"].split('_')[-1]
                        == exp_dict["dataset_train"].split('_')[-1]
                        and loaded_exp_dict["model"]["backbone"]
                        == exp_dict['model']["backbone"]
                        and loaded_exp_dict["labelprop_alpha"]
                        == exp_dict["labelprop_alpha"]
                        and loaded_exp_dict["labelprop_scale"]
                        == exp_dict["labelprop_scale"]
                        and loaded_exp_dict["support_size_train"]
                        == exp_dict["support_size_train"]):
                    accuracy = haven.load_pkl(pkl_path)[-1]["val_accuracy"]
                    try:
                        self.model.load_state_dict(torch.load(
                            os.path.join(base_path,
                                         'checkpoint_best.pth'))['model'],
                                                   strict=False)
                        if accuracy > best_accuracy:
                            best_path = os.path.join(base_path,
                                                     'checkpoint_best.pth')
                            best_accuracy = accuracy
                            best_score_list = haven.load_pkl(pkl_path)
                    except Exception as e:
                        print(str(e))
            assert (best_accuracy > 0.1)
            self.best_accuracy = best_score_list[-1]['test_accuracy']
            print("Finetuning %s with original accuracy : %f" %
                  (base_path, best_accuracy))
            self.model.load_state_dict(torch.load(best_path)['model'],
                                       strict=False)

        else:
            raise ValueError('weights are not defined')
        self.acc_sum = 0.0
        self.n_count = 0
        self.model.cuda()
Пример #29
0
        '6d4af38d64b23586e71a198de2608333': 'LCFCN',
        '84ced18cf5c1fb3ad5820cc1b55a38fa': 'LCFCN+Affinity_(ours)',
        '63f29eec3dbe1e03364f198ed7d4b414': 'Point-level_Loss ',
        '017e7441c2f581b6fee9e3ac6f574edc': 'Cross_entropy_Loss+pseudo-mask'
    }
    datadir = '/mnt/public/datasets/DeepFish/'

    score_list = []
    for hash_id in hash_list:
        fname = os.path.join('/mnt/public/predictions/habitat/%s.pkl' %
                             hash_id)
        exp_dict = hu.load_json(
            os.path.join(savedir_base, hash_id, 'exp_dict.json'))
        if os.path.exists(fname):
            print('FOUND:', fname)
            val_dict = hu.load_pkl(fname)
        else:

            train_set = datasets.get_dataset(
                dataset_dict=exp_dict["dataset"],
                split='train',
                datadir=datadir,
                exp_dict=exp_dict,
                dataset_size=exp_dict['dataset_size'])

            test_set = datasets.get_dataset(
                dataset_dict=exp_dict["dataset"],
                split='test',
                datadir=datadir,
                exp_dict=exp_dict,
                dataset_size=exp_dict['dataset_size'])
def trainval(exp_dict, savedir_base, reset=False):
    # bookkeeping
    # ---------------

    # get experiment directory
    exp_id = hu.hash_dict(exp_dict)
    savedir = os.path.join(savedir_base, exp_id)

    if reset:
        # delete and backup experiment
        hc.delete_experiment(savedir, backup_flag=True)

    # create folder and save the experiment dictionary
    os.makedirs(savedir, exist_ok=True)
    hu.save_json(os.path.join(savedir, "exp_dict.json"), exp_dict)
    print(exp_dict)
    print("Experiment saved in %s" % savedir)

    # Set Seed
    # -------
    seed = exp_dict.get('seed')
    np.random.seed(seed)
    torch.manual_seed(seed)

    # Dataset
    # -----------
    train_dataset = get_dataset('train', exp_dict['dataset'])
    val_dataset = get_dataset('test', exp_dict['dataset'])

    # train and val loader
    train_loader = DataLoader(
        train_dataset,
        batch_size=exp_dict['batch_size'],
        shuffle=True,
        collate_fn=lambda x: x
        if exp_dict['batch_size'] == 1 else default_collate,
        # to handle episodes
        num_workers=args.num_workers)
    val_loader = DataLoader(
        val_dataset,
        batch_size=exp_dict['batch_size'],
        collate_fn=lambda x: x
        if exp_dict['batch_size'] == 1 else default_collate,
        shuffle=True,
        num_workers=args.num_workers)

    # Model
    # -----------
    model = get_model(exp_dict)

    # Checkpoint
    # -----------
    model_path = os.path.join(savedir, "model.pth")
    score_list_path = os.path.join(savedir, "score_list.pkl")

    if os.path.exists(score_list_path):
        # resume experiment
        model.set_state_dict(hu.torch_load(model_path))
        score_list = hu.load_pkl(score_list_path)
        s_epoch = score_list[-1]['epoch'] + 1
    else:
        # restart experiment
        score_list = []
        s_epoch = 0

    # Train & Val
    # ------------
    print("Starting experiment at epoch %d" % (s_epoch))

    for e in range(s_epoch, exp_dict['max_epoch']):
        score_dict = {}

        # Train the model
        score_dict.update(model.train_on_loader(train_loader))

        # Validate the model
        savepath = os.path.join(savedir_base, exp_dict['dataset']['name'])
        score_dict.update(model.val_on_loader(val_loader, savedir=savepath))
        model.on_train_end(savedir=savedir, epoch=e)
        score_dict["epoch"] = e

        # Visualize the model
        # model.vis_on_loader(vis_loader, savedir=savedir+"/images/")

        # Add to score_list and save checkpoint
        score_list += [score_dict]

        # Report & Save
        score_df = pd.DataFrame(score_list)
        print("\n", score_df.tail())
        hu.torch_save(model_path, model.get_state_dict())
        hu.save_pkl(score_list_path, score_list)
        print("Checkpoint Saved: %s" % savedir)

    print('experiment completed')