def set_meta_record_keeper(self):
     is_new_experiment = self.beginning_of_training()
     if len(self.split_manager.split_scheme_names) > 1:
         folders = {folder_type: s % (self.experiment_folder, "meta_logs") for folder_type, s in self.sub_experiment_dirs.items()}
         csv_folder, tensorboard_folder = folders["csvs"], folders["tensorboard"]
         self.meta_record_keeper, _, _ = logging_presets.get_record_keeper(csv_folder, tensorboard_folder,  self.global_db_path, self.args.experiment_name, is_new_experiment)
         self.meta_accuracies = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
示例#2
0
    def run(self):
        ax_client = self.get_ax_client()
        trials = ax_client.experiment.trials
        record_keeper, _, _ = logging_presets.get_record_keeper(
            self.csv_folder, self.tensorboard_folder)
        temp_YR_for_config_diffs = self.read_yaml_and_find_bayes(
            find_bayes_params=False)

        for i in range(0, self.bayes_opt_iters):
            if i in trials and trials[i].status == TrialStatus.COMPLETED:
                continue
            logging.info("Optimization iteration %d" % i)
            c_f.save_config_files(self.YR.args.place_to_save_configs,
                                  temp_YR_for_config_diffs.args.dict_of_yamls,
                                  True, [i])  # save config diffs, if any
            sub_experiment_name = self.get_sub_experiment_name(i)
            parameters, trial_index, experiment_func = self.get_parameters_and_trial_index(
                ax_client, sub_experiment_name, i)
            ax_client.complete_trial(trial_index=trial_index,
                                     raw_data=experiment_func(
                                         parameters, sub_experiment_name))
            self.save_new_log(ax_client)
            self.update_records(record_keeper, ax_client, i)
            self.plot_progress(ax_client)

        logging.info("DONE BAYESIAN OPTIMIZATION")
        self.plot_progress(ax_client)
        best_sub_experiment_name = self.save_best_parameters(
            record_keeper, ax_client)
        self.test_model(best_sub_experiment_name)
        self.reproduce_results(best_sub_experiment_name)
        self.create_accuracy_report(best_sub_experiment_name)
        logging.info("##### FINISHED #####")
    def run(self):
        ax_client = self.get_ax_client()
        num_explored_points = len(
            ax_client.experiment.trials) if ax_client.experiment.trials else 0
        is_new_experiment = num_explored_points == 0
        record_keeper, _, _ = logging_presets.get_record_keeper(
            self.csv_folder, self.tensorboard_folder)

        for i in range(num_explored_points, self.bayes_opt_iters):
            logging.info("Optimization iteration %d" % i)
            sub_experiment_name = self.get_sub_experiment_name(i)
            parameters, trial_index, experiment_func = self.get_parameters_and_trial_index(
                ax_client, sub_experiment_name)
            ax_client.complete_trial(trial_index=trial_index,
                                     raw_data=experiment_func(
                                         parameters, sub_experiment_name))
            self.save_new_log(ax_client)
            self.update_records(record_keeper, ax_client, i)
            self.plot_progress(ax_client)

        logging.info("DONE BAYESIAN OPTIMIZATION")
        self.plot_progress(ax_client)
        best_sub_experiment_name = self.save_best_parameters(
            record_keeper, ax_client)
        self.test_model(best_sub_experiment_name)
        self.reproduce_results(best_sub_experiment_name)
        self.create_accuracy_report(best_sub_experiment_name)
        logging.info("##### FINISHED #####")
示例#4
0
    def _train(self) -> Optional[float]:
        record_keeper, _, _ = logging_presets.get_record_keeper(
            "example_logs", "example_tensorboard")
        hooks = logging_presets.get_hook_container(record_keeper)
        dataset_dict = {"val": self.val_dataset}
        model_folder = "example_saved_models"

        def visualizer_hook(umapper, umap_embeddings, labels, split_name,
                            keyname, *args):
            logging.info("UMAP plot for the {} split and label set {}".format(
                split_name, keyname))
            label_set = np.unique(labels)
            num_classes = len(label_set)
            fig = plt.figure(figsize=(20, 15))
            plt.gca().set_prop_cycle(
                cycler("color", [
                    plt.cm.nipy_spectral(i)
                    for i in np.linspace(0, 0.9, num_classes)
                ]))
            for i in range(num_classes):
                idx = labels == label_set[i]
                plt.plot(umap_embeddings[idx, 0],
                         umap_embeddings[idx, 1],
                         ".",
                         markersize=1)
            #plt.show()
            #plt.show(block=False)
            file_name = './plots/metric_{0}.png'.format(args[0])
            plt.savefig(file_name, dpi=300)
            #

        # Create the tester
        tester = testers.GlobalEmbeddingSpaceTester(
            end_of_testing_hook=hooks.end_of_testing_hook,
            visualizer=umap.UMAP(),
            visualizer_hook=visualizer_hook,
            dataloader_num_workers=32)

        end_of_epoch_hook = hooks.end_of_epoch_hook(tester,
                                                    dataset_dict,
                                                    model_folder,
                                                    test_interval=1,
                                                    patience=200)

        trainer = trainers.MetricLossOnly(
            self.models_dict,
            self.optimizers,
            self._train_cfg.batch_per_gpu,
            self.loss_funcs,
            self.mining_funcs,
            #self._train_loader,
            self.train_set,
            sampler=self.sampler,
            dataloader_num_workers=self._train_cfg.workers - 1,
            end_of_iteration_hook=hooks.end_of_iteration_hook,
            end_of_epoch_hook=end_of_epoch_hook)

        #trainer.train(num_epochs=self._train_cfg.epochs)
        trainer.train(num_epochs=500)
示例#5
0
 def set_record_keeper(self):
     is_new_experiment = self.beginning_of_training() and self.curr_split_count == 0
     self.record_keeper, _, _ = logging_presets.get_record_keeper(csv_folder = self.csv_folder, 
                                                                 tensorboard_folder = self.tensorboard_folder, 
                                                                 global_db_path = self.global_db_path, 
                                                                 experiment_name = self.args.experiment_name, 
                                                                 is_new_experiment = is_new_experiment, 
                                                                 save_figures = self.args.save_figures_on_tensorboard,
                                                                 save_lists = self.args.save_lists_in_db)
示例#6
0
 def delete_sub_experiment_folder(self, sub_experiment_name):
     logging.warning("Deleting and starting fresh for %s" %
                     sub_experiment_name)
     shutil.rmtree(self.get_sub_experiment_path(sub_experiment_name))
     global_record_keeper, _, _ = logging_presets.get_record_keeper(
         self.csv_folder, self.tensorboard_folder, self.global_db_path,
         sub_experiment_name, False)
     global_record_keeper.record_writer.global_db.delete_experiment(
         sub_experiment_name)
示例#7
0
 def set_meta_record_keeper(self):
     is_new_experiment = self.beginning_of_training()
     if len(self.split_manager.split_scheme_names) > 1:
         _, csv_folder, tensorboard_folder = [
             s % (self.experiment_folder, "meta_logs")
             for s in self.sub_experiment_dirs
         ]
         self.meta_record_keeper, _, _ = logging_presets.get_record_keeper(
             csv_folder, tensorboard_folder, self.db_path,
             self.args.experiment_name, is_new_experiment)
         self.meta_accuracies = defaultdict(lambda: defaultdict(dict))
示例#8
0
 def set_meta_record_keeper(self):
     is_new_experiment = self.beginning_of_training()
     folders = {folder_type: s % (self.experiment_folder, "meta_logs") for folder_type, s in self.sub_experiment_dirs.items()}
     csv_folder, tensorboard_folder = folders["csvs"], folders["tensorboard"]
     self.meta_record_keeper, _, _ = logging_presets.get_record_keeper(csv_folder = csv_folder, 
                                                                     tensorboard_folder = tensorboard_folder,
                                                                     global_db_path = self.global_db_path, 
                                                                     experiment_name = self.args.experiment_name, 
                                                                     is_new_experiment = is_new_experiment,
                                                                     save_figures = self.args.save_figures_on_tensorboard,
                                                                     save_lists = self.args.save_lists_in_db)
     self.meta_accuracies = defaultdict(lambda: defaultdict(lambda: defaultdict(dict)))
示例#9
0
def get_testing_hooks(experiment_id, val_dataset, test_interval, patience):
    experiment_dir = os.path.join('experiment_logs', experiment_id)
    record_keeper, _, _ = logging_presets.get_record_keeper(
        experiment_dir,
        os.path.join('experiment_logs', 'tensorboard', experiment_id))
    hooks = logging_presets.get_hook_container(record_keeper)
    dataset_dict = {"val": val_dataset}
    model_folder = experiment_dir

    def visualizer_hook(umapper, umap_embeddings, labels, split_name, keyname,
                        *args):
        logging.info("UMAP plot for the {} split and label set {}".format(
            split_name, keyname))
        label_set = np.unique(labels)
        num_classes = len(label_set)
        fig = plt.figure(figsize=(20, 15))
        plt.gca().set_prop_cycle(
            cycler("color", [
                plt.cm.nipy_spectral(i)
                for i in np.linspace(0, 0.9, num_classes)
            ]))
        for i in range(num_classes):
            idx = labels == label_set[i]
            plt.plot(umap_embeddings[idx, 0],
                     umap_embeddings[idx, 1],
                     ".",
                     markersize=1)
        plt.show()

        writer = SummaryWriter(log_dir=os.path.join(
            'experiment_logs', 'tensorboard', experiment_id))
        writer.add_embedding(umap_embeddings, metadata=labels)
        writer.close()

    # Create the tester
    tester = testers.GlobalEmbeddingSpaceTester(
        end_of_testing_hook=hooks.end_of_testing_hook,
        visualizer=umap.UMAP(),
        visualizer_hook=visualizer_hook,
        dataloader_num_workers=6)
    end_of_epoch_hook = hooks.end_of_epoch_hook(tester,
                                                dataset_dict,
                                                model_folder,
                                                test_interval=test_interval,
                                                patience=patience)
    return end_of_epoch_hook, hooks.end_of_iteration_hook
    def create_accuracy_report(self, best_sub_experiment_name):
        global_record_keeper, _, _ = logging_presets.get_record_keeper(self.csv_folder, self.tensorboard_folder, self.global_db_path, "", False)
        exp_names = glob.glob(os.path.join(self.bayes_opt_root_experiment_folder, "%s*"%best_sub_experiment_name))

        exp_names = [os.path.basename(e) for e in exp_names]
        results, summary = {}, {}

        for eval_type in ["meta", "meta_ConcatenateEmbeddings"]:
            results[eval_type] = {}
            summary[eval_type] = collections.defaultdict(lambda: collections.defaultdict(list))
            table_name = self.eval_record_group_dicts[eval_type]["test"]
            
            for exp in exp_names:
                results[eval_type][exp] = {}
                exp_id = global_record_keeper.record_writer.global_db.get_experiment_id(exp)
                base_query = "SELECT * FROM %s WHERE experiment_id=? AND id=? AND is_trained=?"%table_name
                max_id_query = "SELECT max(id) FROM %s WHERE experiment_id=? AND is_trained=?"%table_name
                qs = {}

                for key, is_trained in [("trained", 1), ("untrained", 0)]:
                    max_id = global_record_keeper.query(max_id_query, values=(exp_id, is_trained), use_global_db=True)[0]["max(id)"]
                    q = global_record_keeper.query(base_query, values=(exp_id, max_id, is_trained), use_global_db=True)
                    if len(q) > 0:
                        qs[key] = q[0]

                for is_trained, v1 in qs.items():
                    q_as_dict = dict(v1)
                    results[eval_type][exp][is_trained] = q_as_dict
                    for acc_key, v2 in q_as_dict.items():
                        if all(not acc_key.startswith(x) for x in ["is_trained", "best_epoch", "best_accuracy", "SEM", "id", "experiment_id", "timestamp"]):
                            summary[eval_type][is_trained][acc_key].append(v2)


            for is_trained, v1 in summary[eval_type].items():
                for acc_key in v1.keys():
                    v2 = v1[acc_key]
                    mean = np.mean(v2)
                    cf_low, cf_high = scipy_stats.t.interval(0.95, len(v2)-1, loc=np.mean(v2), scale=scipy_stats.sem(v2)) #https://stackoverflow.com/a/34474255
                    cf_width = mean-cf_low
                    summary[eval_type][is_trained][acc_key] = {"mean": float(mean), 
                                                                    "95%_confidence_interval": (float(cf_low), float(cf_high)),
                                                                    "95%_confidence_interval_width": float(cf_width)}

        c_f.write_yaml(self.accuracy_report_detailed_filename, results, open_as="w")
        c_f.write_yaml(self.accuracy_report_filename, json.loads(json.dumps(summary)), open_as="w")
示例#11
0
    print("Use following best parameter:")
    print(best_params)

    param_gen = ParameterGenerator(best_trial,
                                   CONF["_fix_params"],
                                   logger=logging.getLogger())

constructors = MODEL_DEF.get(CONF, best_trial, param_gen)

train_dataset, dev_dataset, train_sampler, batch_size = \
    next(constructors["fold_generator"]())
trainer_kwargs = constructors["modules"]()

# logging
record_keeper, _, _ = logging_presets.get_record_keeper(
    csv_folder=os.path.join(args.log_dir, f"csv"),
    tensorboard_folder=os.path.join(args.log_dir, f"tensorboard"))
hooks = logging_presets.get_hook_container(record_keeper)

# tester
tester = testers.GlobalEmbeddingSpaceTester(
    end_of_testing_hook=hooks.end_of_testing_hook, dataloader_num_workers=32)
end_of_epoch_hook = hooks.end_of_epoch_hook(tester, {"val": dev_dataset},
                                            os.path.join(
                                                args.log_dir, f"model"),
                                            test_interval=1,
                                            patience=args.patience)

# train
if args.trainer == "MetricLossOnly":
    trainer = trainers.MetricLossOnly(
示例#12
0
    def test_metric_loss_only(self):

        cifar_resnet_folder = "temp_cifar_resnet_for_pytorch_metric_learning_test"
        dataset_folder = "temp_dataset_for_pytorch_metric_learning_test"
        model_folder = "temp_saved_models_for_pytorch_metric_learning_test"
        logs_folder = "temp_logs_for_pytorch_metric_learning_test"
        tensorboard_folder = "temp_tensorboard_for_pytorch_metric_learning_test"

        os.system(
            "git clone https://github.com/akamaster/pytorch_resnet_cifar10.git {}"
            .format(cifar_resnet_folder))

        loss_fn = NTXentLoss()

        normalize_transform = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                                   std=[0.229, 0.224, 0.225])

        train_transform = transforms.Compose([
            transforms.RandomHorizontalFlip(),
            transforms.RandomCrop(32, 4),
            transforms.ToTensor(),
            normalize_transform,
        ])

        eval_transform = transforms.Compose(
            [transforms.ToTensor(), normalize_transform])

        assert not os.path.isdir(dataset_folder)
        assert not os.path.isdir(model_folder)
        assert not os.path.isdir(logs_folder)
        assert not os.path.isdir(tensorboard_folder)

        subset_idx = np.arange(10000)

        train_dataset = datasets.CIFAR100(dataset_folder,
                                          train=True,
                                          download=True,
                                          transform=train_transform)

        train_dataset_for_eval = datasets.CIFAR100(dataset_folder,
                                                   train=True,
                                                   download=True,
                                                   transform=eval_transform)

        val_dataset = datasets.CIFAR100(dataset_folder,
                                        train=False,
                                        download=True,
                                        transform=eval_transform)

        train_dataset = torch.utils.data.Subset(train_dataset, subset_idx)
        train_dataset_for_eval = torch.utils.data.Subset(
            train_dataset_for_eval, subset_idx)
        val_dataset = torch.utils.data.Subset(val_dataset, subset_idx)

        for dtype in TEST_DTYPES:
            for splits_to_eval in [
                    None,
                [("train", ["train", "val"]), ("val", ["train", "val"])],
            ]:
                from temp_cifar_resnet_for_pytorch_metric_learning_test import resnet

                model = torch.nn.DataParallel(resnet.resnet20())
                checkpoint = torch.load(
                    "{}/pretrained_models/resnet20-12fca82f.th".format(
                        cifar_resnet_folder),
                    map_location=TEST_DEVICE,
                )
                model.load_state_dict(checkpoint["state_dict"])
                model.module.linear = c_f.Identity()
                if TEST_DEVICE == torch.device("cpu"):
                    model = model.module
                model = model.to(TEST_DEVICE).type(dtype)

                optimizer = torch.optim.Adam(
                    model.parameters(),
                    lr=0.0002,
                    weight_decay=0.0001,
                    eps=1e-04,
                )

                batch_size = 32
                iterations_per_epoch = None if splits_to_eval is None else 1
                model_dict = {"trunk": model}
                optimizer_dict = {"trunk_optimizer": optimizer}
                loss_fn_dict = {"metric_loss": loss_fn}
                sampler = MPerClassSampler(
                    np.array(train_dataset.dataset.targets)[subset_idx],
                    m=4,
                    batch_size=32,
                    length_before_new_iter=len(train_dataset),
                )

                record_keeper, _, _ = logging_presets.get_record_keeper(
                    logs_folder, tensorboard_folder)
                hooks = logging_presets.get_hook_container(
                    record_keeper, primary_metric="precision_at_1")
                dataset_dict = {
                    "train": train_dataset_for_eval,
                    "val": val_dataset
                }

                tester = GlobalEmbeddingSpaceTester(
                    end_of_testing_hook=hooks.end_of_testing_hook,
                    accuracy_calculator=accuracy_calculator.AccuracyCalculator(
                        include=("precision_at_1", "AMI"), k=1),
                    data_device=TEST_DEVICE,
                    dtype=dtype,
                    dataloader_num_workers=32,
                )

                end_of_epoch_hook = hooks.end_of_epoch_hook(
                    tester,
                    dataset_dict,
                    model_folder,
                    test_interval=1,
                    patience=1,
                    splits_to_eval=splits_to_eval,
                )

                trainer = MetricLossOnly(
                    models=model_dict,
                    optimizers=optimizer_dict,
                    batch_size=batch_size,
                    loss_funcs=loss_fn_dict,
                    mining_funcs={},
                    dataset=train_dataset,
                    sampler=sampler,
                    data_device=TEST_DEVICE,
                    dtype=dtype,
                    dataloader_num_workers=32,
                    iterations_per_epoch=iterations_per_epoch,
                    freeze_trunk_batchnorm=True,
                    end_of_iteration_hook=hooks.end_of_iteration_hook,
                    end_of_epoch_hook=end_of_epoch_hook,
                )

                num_epochs = 3
                trainer.train(num_epochs=num_epochs)
                best_epoch, best_accuracy = hooks.get_best_epoch_and_accuracy(
                    tester, "val")
                if splits_to_eval is None:
                    self.assertTrue(best_epoch == 3)
                    self.assertTrue(best_accuracy > 0.2)

                accuracies, primary_metric_key = hooks.get_accuracies_of_best_epoch(
                    tester, "val")
                accuracies = c_f.sqliteObjToDict(accuracies)
                self.assertTrue(
                    accuracies[primary_metric_key][0] == best_accuracy)
                self.assertTrue(primary_metric_key == "precision_at_1_level0")

                best_epoch_accuracies = hooks.get_accuracies_of_epoch(
                    tester, "val", best_epoch)
                best_epoch_accuracies = c_f.sqliteObjToDict(
                    best_epoch_accuracies)
                self.assertTrue(best_epoch_accuracies[primary_metric_key][0] ==
                                best_accuracy)

                accuracy_history = hooks.get_accuracy_history(tester, "val")
                self.assertTrue(accuracy_history[primary_metric_key][
                    accuracy_history["epoch"].index(best_epoch)] ==
                                best_accuracy)

                loss_history = hooks.get_loss_history()
                if splits_to_eval is None:
                    self.assertTrue(
                        len(loss_history["metric_loss"]) == (len(sampler) /
                                                             batch_size) *
                        num_epochs)

                curr_primary_metric = hooks.get_curr_primary_metric(
                    tester, "val")
                self.assertTrue(curr_primary_metric ==
                                accuracy_history[primary_metric_key][-1])

                base_record_group_name = hooks.base_record_group_name(tester)

                self.assertTrue(
                    base_record_group_name ==
                    "accuracies_normalized_GlobalEmbeddingSpaceTester_level_0")

                record_group_name = hooks.record_group_name(tester, "val")

                if splits_to_eval is None:
                    self.assertTrue(
                        record_group_name ==
                        "accuracies_normalized_GlobalEmbeddingSpaceTester_level_0_VAL_vs_self"
                    )
                else:
                    self.assertTrue(
                        record_group_name ==
                        "accuracies_normalized_GlobalEmbeddingSpaceTester_level_0_VAL_vs_TRAIN_and_VAL"
                    )

                shutil.rmtree(model_folder)
                shutil.rmtree(logs_folder)
                shutil.rmtree(tensorboard_folder)

        shutil.rmtree(cifar_resnet_folder)
        shutil.rmtree(dataset_folder)
示例#13
0
 def set_meta_record_keeper(self):
     if len(self.split_manager.split_scheme_names) > 1:
         _, pkl_folder, tensorboard_folder = [
             s % (self.experiment_folder, "meta_logs")
             for s in self.sub_experiment_dirs
         ]
         self.meta_record_keeper, self.meta_pickler_and_csver, self.meta_tensorboard_writer = logging_presets.get_record_keeper(
             pkl_folder, tensorboard_folder)
         self.meta_accuracies = defaultdict(lambda: defaultdict(dict))
         c_f.makedir_if_not_there(pkl_folder)
         c_f.makedir_if_not_there(tensorboard_folder)
示例#14
0
 def set_record_keeper(self):
     self.record_keeper, self.pickler_and_csver, self.tensorboard_writer = logging_presets.get_record_keeper(
         self.pkl_folder, self.tensorboard_folder)
示例#15
0
def train_app(cfg):
    print(cfg.pretty())

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Set trunk model and replace the softmax layer with an identity function
    trunk = torchvision.models.__dict__[cfg.model.model_name](pretrained=cfg.model.pretrained)
    
    #resnet18(pretrained=True)
    #trunk = models.alexnet(pretrained=True)
    #trunk = models.resnet50(pretrained=True)
    #trunk = models.resnet152(pretrained=True)
    #trunk = models.wide_resnet50_2(pretrained=True)
    #trunk = EfficientNet.from_pretrained('efficientnet-b2')
    trunk_output_size = trunk.fc.in_features
    trunk.fc = Identity()
    trunk = torch.nn.DataParallel(trunk.to(device))

    embedder = torch.nn.DataParallel(MLP([trunk_output_size, cfg.embedder.size]).to(device))
    classifier = torch.nn.DataParallel(MLP([cfg.embedder.size, cfg.embedder.class_out_size])).to(device)

    # Set optimizers
    if cfg.optimizer.name == "sdg":
        trunk_optimizer = torch.optim.SGD(trunk.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay)
        embedder_optimizer = torch.optim.SGD(embedder.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay)
        classifier_optimizer = torch.optim.SGD(classifier.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay)
    elif cfg.optimizer.name == "rmsprop":
        trunk_optimizer = torch.optim.RMSprop(trunk.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay)
        embedder_optimizer = torch.optim.RMSprop(embedder.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay)
        classifier_optimizer = torch.optim.RMSprop(classifier.parameters(), lr=cfg.optimizer.lr, momentum=cfg.optimizer.momentum, weight_decay=cfg.optimizer.weight_decay)



    # Set the datasets
    data_dir = os.environ["DATASET_FOLDER"]+"/"+cfg.dataset.data_dir
    print("Data dir: "+data_dir)

    train_dataset, val_dataset, val_samples_dataset = get_datasets(data_dir, cfg, mode=cfg.mode.type)
    print("Trainset: ",len(train_dataset), "Testset: ",len(val_dataset), "Samplesset: ",len(val_samples_dataset))

    # Set the loss function
    if cfg.embedder_loss.name == "margin_loss":
        loss = losses.MarginLoss(margin=cfg.embedder_loss.margin,nu=cfg.embedder_loss.nu,beta=cfg.embedder_loss.beta)
    if cfg.embedder_loss.name == "triplet_margin":
        loss = losses.TripletMarginLoss(margin=cfg.embedder_loss.margin)
    if cfg.embedder_loss.name == "multi_similarity":
        loss = losses.MultiSimilarityLoss(alpha=cfg.embedder_loss.alpha, beta=cfg.embedder_loss.beta, base=cfg.embedder_loss.base)

    # Set the classification loss:
    classification_loss = torch.nn.CrossEntropyLoss()

    # Set the mining function

    if cfg.miner.name == "triplet_margin":
        #miner = miners.TripletMarginMiner(margin=0.2)
        miner = miners.TripletMarginMiner(margin=cfg.miner.margin)
    if cfg.miner.name == "multi_similarity":
        miner = miners.MultiSimilarityMiner(epsilon=cfg.miner.epsilon)
        #miner = miners.MultiSimilarityMiner(epsilon=0.05)

    batch_size = cfg.trainer.batch_size
    num_epochs = cfg.trainer.num_epochs
    iterations_per_epoch = cfg.trainer.iterations_per_epoch
    # Set the dataloader sampler
    sampler = samplers.MPerClassSampler(train_dataset.targets, m=4, length_before_new_iter=len(train_dataset))


    # Package the above stuff into dictionaries.
    models = {"trunk": trunk, "embedder": embedder, "classifier": classifier}
    optimizers = {"trunk_optimizer": trunk_optimizer, "embedder_optimizer": embedder_optimizer, "classifier_optimizer": classifier_optimizer}
    loss_funcs = {"metric_loss": loss, "classifier_loss": classification_loss}
    mining_funcs = {"tuple_miner": miner}

    # We can specify loss weights if we want to. This is optional
    loss_weights = {"metric_loss": cfg.loss.metric_loss, "classifier_loss": cfg.loss.classifier_loss}


    schedulers = {
            #"metric_loss_scheduler_by_epoch": torch.optim.lr_scheduler.StepLR(classifier_optimizer, cfg.scheduler.step_size, gamma=cfg.scheduler.gamma),
            "embedder_scheduler_by_epoch": torch.optim.lr_scheduler.StepLR(embedder_optimizer, cfg.scheduler.step_size, gamma=cfg.scheduler.gamma),
            "classifier_scheduler_by_epoch": torch.optim.lr_scheduler.StepLR(classifier_optimizer, cfg.scheduler.step_size, gamma=cfg.scheduler.gamma),
            "trunk_scheduler_by_epoch": torch.optim.lr_scheduler.StepLR(embedder_optimizer, cfg.scheduler.step_size, gamma=cfg.scheduler.gamma),
            }

    experiment_name = "%s_model_%s_cl_%s_ml_%s_miner_%s_mix_ml_%02.2f_mix_cl_%02.2f_resize_%d_emb_size_%d_class_size_%d_opt_%s_lr_%02.2f_m_%02.2f_wd_%02.2f"%(cfg.dataset.name,
                                                                                                  cfg.model.model_name, 
                                                                                                  "cross_entropy", 
                                                                                                  cfg.embedder_loss.name, 
                                                                                                  cfg.miner.name, 
                                                                                                  cfg.loss.metric_loss, 
                                                                                                  cfg.loss.classifier_loss,
                                                                                                  cfg.transform.transform_resize,
                                                                                                  cfg.embedder.size,
                                                                                                  cfg.embedder.class_out_size,
                                                                                                  cfg.optimizer.name,
                                                                                                  cfg.optimizer.lr,
                                                                                                  cfg.optimizer.momentum,
                                                                                                  cfg.optimizer.weight_decay)
    record_keeper, _, _ = logging_presets.get_record_keeper("logs/%s"%(experiment_name), "tensorboard/%s"%(experiment_name))
    hooks = logging_presets.get_hook_container(record_keeper)
    dataset_dict = {"samples": val_samples_dataset, "val": val_dataset}
    model_folder = "example_saved_models/%s/"%(experiment_name)

    # Create the tester
    tester = OneShotTester(
            end_of_testing_hook=hooks.end_of_testing_hook, 
            #size_of_tsne=20
            )
    #tester.embedding_filename=data_dir+"/embeddings_pretrained_triplet_loss_multi_similarity_miner.pkl"
    tester.embedding_filename=data_dir+"/"+experiment_name+".pkl"
    end_of_epoch_hook = hooks.end_of_epoch_hook(tester, dataset_dict, model_folder)
    trainer = trainers.TrainWithClassifier(models,
            optimizers,
            batch_size,
            loss_funcs,
            mining_funcs,
            train_dataset,
            sampler=sampler,
            lr_schedulers=schedulers,
            dataloader_num_workers = cfg.trainer.batch_size,
            loss_weights=loss_weights,
            end_of_iteration_hook=hooks.end_of_iteration_hook,
            end_of_epoch_hook=end_of_epoch_hook
            )

    trainer.train(num_epochs=num_epochs)

    tester = OneShotTester()
示例#16
0
def objective(trial):
    param_gen = ParameterGenerator(trial, CONF["_fix_params"], logger=logger)

    # Average results of multiple folds.
    print("New parameter.")
    metrics = []
    constructors = MODEL_DEF.get(CONF, trial, param_gen)
    for i_fold, (train_dataset, dev_dataset, train_sampler,
                 batch_size) in enumerate(constructors["fold_generator"]()):
        print(f"Fold {i_fold}")
        trainer_kwargs = constructors["modules"]()

        # logging
        record_keeper, _, _ = logging_presets.get_record_keeper(
            csv_folder=os.path.join(args.log_dir,
                                    f"trial_{trial.number}_{i_fold}_csv"),
            tensorboard_folder=os.path.join(
                args.log_dir, f"trial_{trial.number}_{i_fold}_tensorboard"))
        hooks = logging_presets.get_hook_container(record_keeper)

        # tester
        tester = testers.GlobalEmbeddingSpaceTester(
            end_of_testing_hook=hooks.end_of_testing_hook,
            dataloader_num_workers=args.n_test_loader)
        end_of_epoch_hook = hooks.end_of_epoch_hook(
            tester, {"val": dev_dataset},
            os.path.join(args.log_dir, f"trial_{trial.number}_{i_fold}_model"),
            test_interval=1,
            patience=args.patience)

        CHECKPOINT_FN = os.path.join(
            args.log_dir, f"trial_{trial.number}_{i_fold}_last.pth")

        def actual_end_of_epoch_hook(trainer):
            continue_training = end_of_epoch_hook(trainer)

            torch.save(
                ({k: m.state_dict()
                  for k, m in trainer.models.items()},
                 {k: m.state_dict()
                  for k, m in trainer.optimizers.items()},
                 {k: m.state_dict()
                  for k, m in trainer.loss_funcs.items()}, trainer.epoch),
                CHECKPOINT_FN)

            return continue_training

        # train
        if args.trainer == "MetricLossOnly":
            trainer = trainers.MetricLossOnly(
                batch_size=batch_size,
                mining_funcs={},
                dataset=train_dataset,
                sampler=train_sampler,
                dataloader_num_workers=args.n_train_loader,
                end_of_iteration_hook=hooks.end_of_iteration_hook,
                end_of_epoch_hook=actual_end_of_epoch_hook,
                **trainer_kwargs)
        elif args.trainer == "TrainWithClassifier":
            trainer = trainers.TrainWithClassifier(
                batch_size=batch_size,
                mining_funcs={},
                dataset=train_dataset,
                sampler=train_sampler,
                dataloader_num_workers=args.n_train_loader,
                end_of_iteration_hook=hooks.end_of_iteration_hook,
                end_of_epoch_hook=actual_end_of_epoch_hook,
                **trainer_kwargs)

        while True:
            start_epoch = 1
            if os.path.exists(CHECKPOINT_FN):
                model_dicts, optimizer_dicts, loss_dicts, last_epoch = \
                    torch.load(CHECKPOINT_FN)
                for k, d in model_dicts.items():
                    trainer.models[k].load_state_dict(d)
                for k, d in optimizer_dicts.items():
                    trainer.optimizers[k].load_state_dict(d)
                for k, d in loss_dicts.items():
                    trainer.loss_funcs[k].load_state_dict(d)
                start_epoch = last_epoch + 1

                logger.critical(f"Start from old epoch: {last_epoch + 1}")
            try:
                trainer.train(num_epochs=args.max_epoch,
                              start_epoch=start_epoch)
            except Exception as err:
                logger.critical(f"Error: {err}")
                if not args.ignore_error:
                    break
                else:
                    raise err
            else:
                break

        rslt = hooks.get_accuracy_history(
            tester, "val", metrics=["mean_average_precision_at_r"])

        metrics.append(max(rslt["mean_average_precision_at_r_level0"]))
    return np.mean(metrics)
 def _create_general(self, record_keeper_type):
     record_keeper, _, _ = logging_presets.get_record_keeper(
         **record_keeper_type)
     return record_keeper
示例#18
0
def train(train_data, test_data, save_model, num_epochs, lr, embedding_size,
          batch_size):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Set trunk model and replace the softmax layer with an identity function
    trunk = torchvision.models.resnet18(pretrained=True)
    trunk_output_size = trunk.fc.in_features
    trunk.fc = common_functions.Identity()
    trunk = torch.nn.DataParallel(trunk.to(device))

    # Set embedder model. This takes in the output of the trunk and outputs 64 dimensional embeddings
    embedder = torch.nn.DataParallel(
        MLP([trunk_output_size, embedding_size]).to(device))

    # Set optimizers
    trunk_optimizer = torch.optim.Adam(trunk.parameters(),
                                       lr=lr / 10,
                                       weight_decay=0.0001)
    embedder_optimizer = torch.optim.Adam(embedder.parameters(),
                                          lr=lr,
                                          weight_decay=0.0001)

    # Set the loss function
    loss = losses.TripletMarginLoss(margin=0.1)

    # Set the mining function
    miner = miners.MultiSimilarityMiner(epsilon=0.1)

    # Set the dataloader sampler
    sampler = samplers.MPerClassSampler(train_data.targets,
                                        m=4,
                                        length_before_new_iter=len(train_data))

    save_dir = os.path.join(
        save_model, ''.join(str(lr).split('.')) + '_' + str(batch_size) + '_' +
        str(embedding_size))

    os.makedirs(save_dir, exist_ok=True)

    # Package the above stuff into dictionaries.
    models = {"trunk": trunk, "embedder": embedder}
    optimizers = {
        "trunk_optimizer": trunk_optimizer,
        "embedder_optimizer": embedder_optimizer
    }
    loss_funcs = {"metric_loss": loss}
    mining_funcs = {"tuple_miner": miner}

    record_keeper, _, _ = logging_presets.get_record_keeper(
        os.path.join(save_dir, "example_logs"),
        os.path.join(save_dir, "example_tensorboard"))
    hooks = logging_presets.get_hook_container(record_keeper)

    dataset_dict = {"val": test_data, "train": train_data}
    model_folder = "example_saved_models"

    def visualizer_hook(umapper, umap_embeddings, labels, split_name, keyname,
                        *args):
        logging.info("UMAP plot for the {} split and label set {}".format(
            split_name, keyname))
        label_set = np.unique(labels)
        num_classes = len(label_set)
        fig = plt.figure(figsize=(20, 15))
        plt.title(str(split_name) + '_' + str(num_embeddings))
        plt.gca().set_prop_cycle(
            cycler("color", [
                plt.cm.nipy_spectral(i)
                for i in np.linspace(0, 0.9, num_classes)
            ]))
        for i in range(num_classes):
            idx = labels == label_set[i]
            plt.plot(umap_embeddings[idx, 0],
                     umap_embeddings[idx, 1],
                     ".",
                     markersize=1)
        plt.show()

    # Create the tester
    tester = testers.GlobalEmbeddingSpaceTester(
        end_of_testing_hook=hooks.end_of_testing_hook,
        visualizer=umap.UMAP(),
        visualizer_hook=visualizer_hook,
        dataloader_num_workers=32,
        accuracy_calculator=AccuracyCalculator(k="max_bin_count"))

    end_of_epoch_hook = hooks.end_of_epoch_hook(tester,
                                                dataset_dict,
                                                model_folder,
                                                test_interval=1,
                                                patience=1)

    trainer = trainers.MetricLossOnly(
        models,
        optimizers,
        batch_size,
        loss_funcs,
        mining_funcs,
        train_data,
        sampler=sampler,
        dataloader_num_workers=32,
        end_of_iteration_hook=hooks.end_of_iteration_hook,
        end_of_epoch_hook=end_of_epoch_hook)

    trainer.train(num_epochs=num_epochs)

    if save_model is not None:

        torch.save(models["trunk"].state_dict(),
                   os.path.join(save_dir, 'trunk.pth'))
        torch.save(models["embedder"].state_dict(),
                   os.path.join(save_dir, 'embedder.pth'))

        print('Model saved in ', save_dir)
示例#19
0
    def create_accuracy_report(self, best_sub_experiment_name):
        dummy_YR = self.read_yaml_and_find_bayes(find_bayes_params=False,
                                                 merge_argparse=True)
        dummy_api_parser = self.get_api_parser(dummy_YR.args)
        eval_record_group_dicts = dummy_api_parser.get_eval_record_name_dict(
            return_all=True)
        global_record_keeper, _, _ = logging_presets.get_record_keeper(
            self.csv_folder, self.tensorboard_folder, self.global_db_path, "",
            False)
        exp_names = glob.glob(
            os.path.join(self.bayes_opt_root_experiment_folder,
                         "%s*" % best_sub_experiment_name))

        exp_names = [os.path.basename(e) for e in exp_names]
        results, summary = {}, {}

        for eval_type in c_f.if_str_convert_to_singleton_list(
                dummy_YR.args.meta_testing_method):
            results[eval_type] = {}
            summary[eval_type] = collections.defaultdict(
                lambda: collections.defaultdict(list))
            table_name = eval_record_group_dicts[eval_type]["test"]

            for exp in exp_names:
                results[eval_type][exp] = {}
                exp_id = global_record_keeper.record_writer.global_db.get_experiment_id(
                    exp)
                base_query = "SELECT * FROM {} WHERE experiment_id=? AND id=? AND {}=?".format(
                    table_name, const.TRAINED_STATUS_COL_NAME)
                max_id_query = "SELECT max(id) FROM {} WHERE experiment_id=? AND {}=?".format(
                    table_name, const.TRAINED_STATUS_COL_NAME)
                qs = {}

                for trained_status in [
                        const.UNTRAINED_TRUNK,
                        const.UNTRAINED_TRUNK_AND_EMBEDDER, const.TRAINED
                ]:
                    max_id = global_record_keeper.query(
                        max_id_query,
                        values=(exp_id, trained_status),
                        use_global_db=True)[0]["max(id)"]
                    q = global_record_keeper.query(base_query,
                                                   values=(exp_id, max_id,
                                                           trained_status),
                                                   use_global_db=True)
                    if len(q) > 0:
                        qs[trained_status] = q[0]

                for trained_status, v1 in qs.items():
                    q_as_dict = dict(v1)
                    results[eval_type][exp][trained_status] = q_as_dict
                    for acc_key, v2 in q_as_dict.items():
                        if all(not acc_key.startswith(x) for x in [
                                const.TRAINED_STATUS_COL_NAME, "epoch", "SEM",
                                "id", "experiment_id", "timestamp"
                        ]):
                            summary[eval_type][trained_status][acc_key].append(
                                v2)

            for trained_status, v1 in summary[eval_type].items():
                for acc_key in v1.keys():
                    v2 = v1[acc_key]
                    mean = np.mean(v2)
                    cf_low, cf_high = scipy_stats.t.interval(
                        0.95,
                        len(v2) - 1,
                        loc=np.mean(v2),
                        scale=scipy_stats.sem(
                            v2))  #https://stackoverflow.com/a/34474255
                    cf_width = mean - cf_low
                    summary[eval_type][trained_status][acc_key] = {
                        "mean": float(mean),
                        "95%_confidence_interval":
                        (float(cf_low), float(cf_high)),
                        "95%_confidence_interval_width": float(cf_width)
                    }

        eval_name = c_f.first_val_of_dict(
            dummy_api_parser.get_eval_record_name_dict(
                eval_type=const.NON_META, return_base_record_group_name=True))
        detailed_report_filename = os.path.join(
            self.bayes_opt_root_experiment_folder,
            "detailed_report_{}.yaml".format(eval_name))
        report_filename = os.path.join(self.bayes_opt_root_experiment_folder,
                                       "report_{}.yaml".format(eval_name))
        c_f.write_yaml(detailed_report_filename, results, open_as="w")
        c_f.write_yaml(report_filename,
                       json.loads(json.dumps(summary)),
                       open_as="w")
 def set_record_keeper(self):
     is_new_experiment = self.beginning_of_training(
     ) and self.curr_split_count == 0
     self.record_keeper, _, _ = logging_presets.get_record_keeper(
         self.csv_folder, self.tensorboard_folder, self.global_db_path,
         self.args.experiment_name, is_new_experiment)
示例#21
0
    "synth_loss": synth_loss,
    "g_adv_loss": g_adv_loss
}

# Package the above stuff into dictionaries.
mining_funcs = {"tuple_miner": miner}

loss_weights = {
    "metric_loss": 1,
    "synth_loss": 0.1,
    "g_adv_loss": 0.1,
    "g_hard_loss": 0.1,
    "g_reg_loss": 0.1
}

record_keeper, _, _ = logging_presets.get_record_keeper(
    "example_logs", "example_tensorboard")
hooks = logging_presets.get_hook_container(record_keeper)
dataset_dict = {"val": val_dataset}
model_folder = "example_saved_models"

# Create the tester
tester = testers.GlobalEmbeddingSpaceTester(
    end_of_testing_hook=hooks.end_of_testing_hook)
end_of_epoch_hook = hooks.end_of_epoch_hook(tester, dataset_dict, model_folder)
trainer = trainers.DeepAdversarialMetricLearning(
    models=models,
    optimizers=optimizers,
    batch_size=batch_size,
    loss_funcs=loss_funcs,
    mining_funcs=mining_funcs,
    iterations_per_epoch=iterations_per_epoch,
示例#22
0
def main():
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
    print('Running on device: {}'.format(device))

    # Data transformations
    trans_train = transforms.Compose([
        transforms.RandomApply(transforms=[
            transforms.AutoAugment(transforms.AutoAugmentPolicy.IMAGENET),
            # transforms.RandomPerspective(distortion_scale=0.6, p=1.0),
            transforms.RandomRotation(degrees=(0, 180)),
            transforms.RandomHorizontalFlip(),
        ]),
        np.float32,
        transforms.ToTensor(),
        fixed_image_standardization,
    ])

    trans_val = transforms.Compose([
        # transforms.CenterCrop(120),
        np.float32,
        transforms.ToTensor(),
        fixed_image_standardization,
    ])

    train_dataset = datasets.ImageFolder(os.path.join(data_dir,
                                                      "train_aligned"),
                                         transform=trans_train)
    val_dataset = datasets.ImageFolder(os.path.join(data_dir, "val_aligned"),
                                       transform=trans_val)

    # Prepare the model
    model = InceptionResnetV1(classify=False,
                              pretrained="vggface2",
                              dropout_prob=0.5).to(device)

    # for param in list(model.parameters())[:-8]:
    #     param.requires_grad = False

    trunk_optimizer = torch.optim.SGD(model.parameters(), lr=LR)

    # Set the loss function
    loss = losses.ArcFaceLoss(len(train_dataset.classes), 512)

    # Package the above stuff into dictionaries.
    models = {"trunk": model}
    optimizers = {"trunk_optimizer": trunk_optimizer}
    loss_funcs = {"metric_loss": loss}
    mining_funcs = {}
    lr_scheduler = {
        "trunk_scheduler_by_plateau":
        torch.optim.lr_scheduler.ReduceLROnPlateau(trunk_optimizer)
    }

    # Create the tester
    record_keeper, _, _ = logging_presets.get_record_keeper(
        "logs", "tensorboard")
    hooks = logging_presets.get_hook_container(record_keeper)

    dataset_dict = {"val": val_dataset, "train": train_dataset}
    model_folder = "training_saved_models"

    def visualizer_hook(umapper, umap_embeddings, labels, split_name, keyname,
                        *args):
        logging.info("UMAP plot for the {} split and label set {}".format(
            split_name, keyname))
        label_set = np.unique(labels)
        num_classes = len(label_set)
        fig = plt.figure(figsize=(8, 7))
        plt.gca().set_prop_cycle(
            cycler("color", [
                plt.cm.nipy_spectral(i)
                for i in np.linspace(0, 0.9, num_classes)
            ]))
        for i in range(num_classes):
            idx = labels == label_set[i]
            plt.plot(umap_embeddings[idx, 0],
                     umap_embeddings[idx, 1],
                     ".",
                     markersize=1)
        plt.show()

    tester = testers.GlobalEmbeddingSpaceTester(
        end_of_testing_hook=hooks.end_of_testing_hook,
        dataloader_num_workers=4,
        accuracy_calculator=AccuracyCalculator(
            include=['mean_average_precision_at_r'], k="max_bin_count"))

    end_of_epoch_hook = hooks.end_of_epoch_hook(tester,
                                                dataset_dict,
                                                model_folder,
                                                splits_to_eval=[('val',
                                                                 ['train'])])

    # Create the trainer
    trainer = trainers.MetricLossOnly(
        models,
        optimizers,
        batch_size,
        loss_funcs,
        mining_funcs,
        train_dataset,
        lr_schedulers=lr_scheduler,
        dataloader_num_workers=8,
        end_of_iteration_hook=hooks.end_of_iteration_hook,
        end_of_epoch_hook=end_of_epoch_hook)

    trainer.train(num_epochs=num_epochs)