Пример #1
0
    def save_aucs(self, summary_path, AUCs_path, **kwargs):

        summaries = summaryProcessor.get_summaries_from_path(summary_path)

        if not os.path.exists(AUCs_path):
            Path(AUCs_path).mkdir(parents=True, exist_ok=False)

        for _, summary in summaries.df.iterrows():

            filename = summary.training_output_path.split("/")[-1]
            utils.set_random_seed(summary.seed)
            data_processor = DataProcessor(summary=summary)
            data_loader = self.__get_data_loader(summary)

            auc_params = self.model_evaluator.get_aucs(
                summary=summary,
                AUCs_path=AUCs_path,
                filename=filename,
                data_processor=data_processor,
                data_loader=data_loader,
                **kwargs)
            if auc_params is None:
                continue

            (aucs, auc_path, append, write_header) = auc_params
            self.__save_aucs_to_csv(aucs=aucs,
                                    path=auc_path,
                                    append=append,
                                    write_header=write_header)
Пример #2
0
 def get_qcd_test_data(self, summary, normalize=False):
     utils.set_random_seed(summary.seed)
     data_processor = DataProcessor(summary=summary)
     data_loader = self.__get_data_loader(summary)
     return self.model_evaluator.get_qcd_test_data(summary,
                                                   data_processor,
                                                   data_loader,
                                                   normalize=normalize)
Пример #3
0
    def __init__(
        self,
        # general settings of the training
        model_trainer_path,
        validation_data_fraction,
        test_data_fraction,
        include_hlf,
        include_efp,
        hlf_to_drop,
        # arguments that will be passed to the specialized trainer class
        **training_settings):
        """
        Constructor of the general Trainer class, which will delegate architecture-specific tasks to
        a specialized Trainer class.
        """

        # Import correct specialized class
        self.model_class = utils.import_class(model_trainer_path)

        # Save general training arguments
        self.validation_data_fraction = validation_data_fraction
        self.test_data_fraction = test_data_fraction
        self.include_hlf = include_hlf
        self.include_efp = include_efp
        self.hlf_to_drop = hlf_to_drop

        # Draw, set and save random seed
        self.seed = np.random.randint(0, 99999999)
        utils.set_random_seed(self.seed)

        # Save training output path (used to save the model later on)
        self.training_output_path = training_settings["training_output_path"]

        # Prepare data processor and data loader for the specialized class
        data_processor = DataProcessor(
            validation_fraction=validation_data_fraction,
            test_fraction=test_data_fraction,
            seed=self.seed)

        data_loader = DataLoader()
        data_loader.set_params(include_hlf=include_hlf,
                               include_eflow=include_efp,
                               hlf_to_drop=hlf_to_drop)

        # Initialize specialized trainer object
        self.model_trainer = self.model_class(data_processor=data_processor,
                                              data_loader=data_loader,
                                              **training_settings)
Пример #4
0
    def __init__(
        self,
        args,
        train_dataloader,
        valid_dataloader,
        model,
        device=torch.device("cpu"),
        patience=5,
    ):
        self.args = args
        self.device = device

        # data
        self.num_class = train_dataloader.dataset.num_classes
        self.train_dataloader = train_dataloader
        self.valid_dataloader = valid_dataloader

        # model
        self.model = model
        self.model.to(self.device)

        # optimizer
        self.set_optimizer()

        # metric
        self.set_metrics()
        self.best_val = 0.0
        self.best_train = 0.0

        # early stopping
        self.patience = patience
        self.es_count = self.patience

        # save dir
        self.save_path = os.path.join(self.args.model_dir, "model_best.pkl")
        if not os.path.isdir(self.args.model_dir):
            os.mkdir(self.args.model_dir)

        # tensorboardX writer
        self.writer = SummaryWriter(os.path.join(self.args.model_dir, "train_logs"))

        # set random seed
        set_random_seed(self.args.random_seed)
Пример #5
0
    def draw_roc_curves(self, summary_path, summary_version, **kwargs):

        summaries = summaryProcessor.get_summaries_from_path(summary_path)

        plotting_args = {
            k: v
            for k, v in kwargs.items()
            if k not in ["signals", "signals_base_path"]
        }

        fig, ax_begin, ax_end, plt_end, colors = self.__get_plot_params(
            n_plots=1, **plotting_args)
        ax = ax_begin(0)

        for _, summary in summaries.df.iterrows():
            version = summaryProcessor.get_version(summary.summary_path)
            if version != summary_version:
                continue

            utils.set_random_seed(summary.seed)
            kwargs["filename"] = summary.training_output_path.split("/")[-1]
            data_processor = DataProcessor(summary=summary)
            data_loader = self.__get_data_loader(summary)

            self.model_evaluator.draw_roc_curves(summary=summary,
                                                 data_processor=data_processor,
                                                 data_loader=data_loader,
                                                 ax=ax,
                                                 colors=colors,
                                                 **kwargs)

        x = [i for i in np.arange(0, 1.1, 0.1)]
        ax.plot(x, x, '--', c='black')
        ax_end("false positive rate", "true positive rate")
        plt_end()
        plt.show()
Пример #6
0
    parser.add_argument("target_domain", type=str, help="Target domain.")
    parser.add_argument("output_path",
                        type=str,
                        help="Output path to store prediction.")
    parser.add_argument("--batch_size",
                        type=int,
                        default=32,
                        help="Batch size.")
    parser.add_argument("--random_seed",
                        type=int,
                        default=42,
                        help="random seed.")

    args = parser.parse_args()

    set_random_seed(args.random_seed)
    dataset = DigitTestDataset(args.images_dir)
    dataloader = DataLoader(dataset,
                            batch_size=args.batch_size,
                            shuffle=False,
                            num_workers=8)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Model dir
    if args.target_domain == "svhn":
        model_dir = os.path.join("models", "dsn_m2s", "model_best.pth.tar")
    else:
        model_dir = os.path.join("models", "dsn_s2m", "model_best.pth.tar")

    # Models
Пример #7
0
import random
import argparse

import numpy as np
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from sklearn.manifold import TSNE

from module.utils import set_random_seed
from module.dataset.digit import DigitDataset
from module.da.dann import DANN
from module.dsn.dsn import DSN

set_random_seed(42)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def plot_tsne(
    source_features_ls,
    source_labels_ls,
    source_domains_ls,
    target_features_ls,
    target_labels_ls,
    target_domains_ls,
    fname,
    sample_size=250,
):
    source_features_ls = np.concatenate(source_features_ls)
    source_labels_ls = np.concatenate(source_labels_ls)
Пример #8
0
    fig_1_path = os.path.join(args.output_dir, "fig1_2.jpg")
    fig_2_path = os.path.join(args.output_dir, "fig2_2.jpg")

    dcgen = DCGen(latent_dim=128)
    dcgen.load_state_dict(torch.load(args.dcgan_model))
    dcgen.to(device)
    acgen = ACGen(latent_dim=120,
                  n_feature_maps=128,
                  embedding_dim=4,
                  n_attributes=1)
    acgen.load_state_dict(torch.load(args.acgan_model))
    acgen.to(device)

    # prepare dc noise
    set_random_seed(int(args.random_seed[0]))
    # dc_noise = torch.randn(64, 128, 1, 1, device=device)
    # dc_noise[1], dc_noise[2], dc_noise[5] = dc_noise[42], dc_noise[50], dc_noise[54]
    # dc_noise[9], dc_noise[18], dc_noise[28] = dc_noise[60], dc_noise[32], dc_noise[34]
    # dc_noise[27] = dc_noise[63]
    # dc_noise = dc_noise[:32]

    # tmp_dc_noise = dc_noise.detach().cpu().numpy()
    # with open("models/dcgan/fixed_noise.pkl", "wb") as fout:
    #     pickle.dump(tmp_dc_noise, fout)

    with open(os.path.join("models", "dcgan", "fixed_noise.pkl"), "rb") as fin:
        dc_noise = pickle.load(fin)
    dc_noise = torch.tensor(dc_noise).float()
    dc_noise = dc_noise.to(device)
Пример #9
0
 def get_error(self, input_data, summary, scaler=None):
     utils.set_random_seed(summary.seed)
     data_processor = DataProcessor(summary=summary)
     return self.model_evaluator.get_error(input_data, summary,
                                           data_processor, scaler)
Пример #10
0
 def get_signal_test_data(self, name, path, summary):
     utils.set_random_seed(summary.seed)
     data_processor = DataProcessor(summary=summary)
     data_loader = self.__get_data_loader(summary)
     return self.model_evaluator.get_signal_test_data(
         name, path, summary, data_processor, data_loader)
Пример #11
0
    def __init__(
        self,
        qcd_path,
        training_params,
        bottleneck_size,
        intermediate_architecture=(30, 30),
        test_data_fraction=0.15,
        validation_data_fraction=0.15,
        norm_type="",
        norm_args=None,
        hlf_to_drop=None,
    ):
        """
        Creates auto-encoder trainer with random seed, provided training parameters and architecture.
        Loads specified data, splits them into training, validation and test samples according to
        provided arguments. Normalizes the data as specified by norm_percentile.
        High-level features specified in hlf_to_drop will not be used for training.
        """

        if hlf_to_drop is None:
            hlf_to_drop = ['Energy', 'Flavor']

        self.seed = np.random.randint(0, 99999999)
        utils.set_random_seed(self.seed)

        self.qcd_path = qcd_path
        self.hlf_to_drop = hlf_to_drop

        self.training_params = training_params
        self.test_data_fraction = test_data_fraction
        self.validation_data_fraction = validation_data_fraction

        data_loader = DataLoader()

        # Load QCD samples
        (self.qcd, qcd_jets, qcd_event,
         qcd_flavor) = data_loader.load_all_data(qcd_path,
                                                 "qcd background",
                                                 include_hlf=True,
                                                 include_eflow=True,
                                                 hlf_to_drop=hlf_to_drop)

        data_processor = DataProcessor(
            validation_fraction=self.validation_data_fraction,
            test_fraction=self.test_data_fraction,
            seed=self.seed)

        (train_data, validation_data, test_data, _,
         _) = data_processor.split_to_train_validate_test(data_table=self.qcd)

        train_data.output_file_prefix = "qcd training data"
        validation_data.output_file_prefix = "qcd validation data"

        # Normalize the input
        self.norm_type = norm_type
        self.norm_args = norm_args

        self.data_ranges = np.asarray([])

        self.means_train, self.stds_train = None, None
        self.means_validation, self.stds_validation = None, None

        if norm_type == "Custom":
            self.data_ranges = utils.percentile_normalization_ranges(
                train_data, norm_args["norm_percentile"])
        elif norm_type == "CustomStandard":
            self.means_train, self.stds_train = train_data.get_means_and_stds()
            self.means_validation, self.stds_validation = validation_data.get_means_and_stds(
            )

        print("Trainer scaler args: ", self.norm_args)

        self.train_data_normalized = data_processor.normalize(
            data_table=train_data,
            normalization_type=self.norm_type,
            norm_args=self.norm_args,
            data_ranges=self.data_ranges,
            means=self.means_train,
            stds=self.stds_train)

        self.validation_data_normalized = data_processor.normalize(
            data_table=validation_data,
            normalization_type=self.norm_type,
            norm_args=self.norm_args,
            data_ranges=self.data_ranges,
            means=self.means_validation,
            stds=self.stds_validation)

        # Build the model
        self.input_size = len(self.qcd.columns)
        self.intermediate_architecture = intermediate_architecture
        self.bottleneck_size = bottleneck_size
        self.model = self.get_auto_encoder_model()