Пример #1
0
    def _init_experiment(self):
        """
        init the experiment,
        which will visualize and log the performance of the model
        Returns:

        """

        # 1 - define tags
        tags = []
        if self.eval_train:
            self._init_experiment_tag(self.train_set, tags, "train")
        if self.val_set is not None:
            self._init_experiment_tag(self.val_set, tags, "val")

        # 2 - define experiment
        experiment = Experiment(name=self.config["name"],
                                desc=str(self.model),
                                hparams=self.config)

        # 3 - define metrics
        for name, metric in self.metrics.items():
            experiment.add_metric(Metric(name=name, tags=tags,
                                         vis_type="line"))
            experiment.add_metric(
                Metric(name="loss", tags=tags, vis_type="line"))

        return experiment
Пример #2
0
def bcn(config, data_file, embeddings, device, dataset, embeddings_type):
    #   extensions : add 2 languages, use a combination of CoVe embeddings (like ELMo)

    name = "test_model"
    torch.manual_seed(123)

    inputs = data.Field(lower=True, include_lengths=True, batch_first=True)
    labels = data.Field(sequential=False, unk_token=None)

    print('Generating train, dev, test splits')

    if dataset == 'IWSLT':
        # using the IWSLT 2016 TED talk translation task
        train, dev, test = datasets.IWSLT.splits(root=data_file,
                                                 exts=['.en', '.de'],
                                                 fields=[inputs, inputs])
    elif dataset == 'SST-2':
        train, dev, test = datasets.SST.splits(
            text_field=inputs,
            label_field=labels,
            root=data_file,
            fine_grained=False,
            train_subtrees=True,
            filter_pred=lambda ex: ex.label != 'neutral')
    elif dataset == 'SST-5':
        train, dev, test = datasets.SST.splits(text_field=inputs,
                                               label_field=labels,
                                               root=data_file,
                                               fine_grained=True,
                                               train_subtrees=True)
    elif dataset == 'IMDB':
        train, test = datasets.IMDB.splits(text_field=inputs,
                                           label_field=labels,
                                           root=data_file)
        train, dev = train.split(
            split_ratio=0.9,
            stratified=True)  # 0.9 in order to be close to the paper
    elif dataset == 'TREC-6':
        train, test = datasets.TREC.splits(text_field=inputs,
                                           label_field=labels,
                                           root=data_file,
                                           fine_grained=False)
        train, dev = train.split(split_ratio=0.9, stratified=True)
    elif dataset == 'TREC-50':
        train, test = datasets.TREC.splits(text_field=inputs,
                                           label_field=labels,
                                           root=data_file,
                                           fine_grained=True)
        train, dev = train.split()
    elif dataset == 'SNLI':
        train, dev, test = datasets.SNLI.splits(text_field=inputs,
                                                label_field=labels,
                                                root=data_file)
    else:
        print('Invalid dataset name detected...')
        return

    print('Building vocabulary')
    inputs.build_vocab(train, dev, test)
    inputs.vocab.load_vectors(
        vectors=GloVe(name='840B', dim=300, cache=embeddings))

    labels.build_vocab(train, dev, test)

    train_iter, dev_iter, test_iter = data.BucketIterator.splits(
        (train, dev, test),
        batch_size=config["train_batch_size"],
        device=torch.device(device) if device >= 0 else None,
        sort_within_batch=True)

    model = BCN(config=config,
                n_vocab=len(inputs.vocab),
                vocabulary=inputs.vocab.vectors,
                embeddings=embeddings,
                num_labels=len(labels.vocab.freqs),
                embeddings_type=embeddings_type)

    bcn_params = [
        p for n, p in model.named_parameters()
        if "mtlstm" not in n and p.requires_grad
    ]

    criterion = nn.CrossEntropyLoss()
    optimizer = Adam(bcn_params, lr=0.001)

    if device != -1:
        model.to(device)
    print(model)
    total_params = sum(p.numel() for p in model.parameters())
    total_trainable_params = sum(p.numel() for p in bcn_params
                                 if p.requires_grad)

    print("Total Params:", number_h(total_params))
    print("Total Trainable Params:", number_h(total_trainable_params))

    #####################################
    # Training Pipeline
    #####################################
    trainer = BCNTrainer(model=model,
                         train_loader=train_iter,
                         valid_loader=dev_iter,
                         criterion=criterion,
                         device="cpu" if device == -1 else 'cuda',
                         config=config,
                         optimizers=[optimizer])

    print('Generating CoVe')

    ####################################################################
    # Experiment: logging and visualizing the training process
    ####################################################################
    exp = Experiment(name, config, src_dirs=None, output_dir=EXP_DIR)
    exp.add_metric("ep_loss", "line", "epoch loss class", ["TRAIN", "VAL"])
    exp.add_metric("ep_f1", "line", "epoch f1", ["TRAIN", "VAL"])
    exp.add_metric("ep_acc", "line", "epoch accuracy", ["TRAIN", "VAL"])

    exp.add_value("epoch", title="epoch summary")
    exp.add_value("progress", title="training progress")

    ####################################################################
    # Training Loop
    ####################################################################
    best_loss = None
    early_stopping = EarlyStopping("min", config["patience"])

    for epoch in range(1, config["epochs"] + 1):
        train_loss = trainer.train_epoch()
        print(model.w, model.gama)
        val_loss, y, y_pred = trainer.eval_epoch()

        # Calculate accuracy and f1-macro on the evaluation set
        exp.update_metric("ep_loss", train_loss.item(), "TRAIN")
        exp.update_metric("ep_loss", val_loss.item(), "VAL")
        exp.update_metric("ep_f1", 0, "TRAIN")
        exp.update_metric("ep_f1", f1_macro(y, y_pred), "VAL")
        exp.update_metric("ep_acc", 0, "TRAIN")
        exp.update_metric("ep_acc", acc(y, y_pred), "VAL")

        print()
        epoch_log = exp.log_metrics(["ep_loss", "ep_f1", "ep_acc"])
        print(epoch_log)
        exp.update_value("epoch", epoch_log)

        # Save the model if the val loss is the best we've seen so far.
        if not best_loss or val_loss < best_loss:
            best_loss = val_loss
            trainer.best_acc = acc(y, y_pred)
            trainer.best_f1 = f1_macro(y, y_pred)
            trainer.checkpoint(name=name)

        if early_stopping.stop(val_loss):
            print("Early Stopping (according to cls loss)....")
            break

        print("\n" * 2)

    return best_loss, trainer.best_acc, trainer.best_f1
def clf_features_baseline_runner(yaml,
                                 word2idx,
                                 idx2word,
                                 weights,
                                 cluster=False):
    if cluster is False:
        from logger.experiment import Experiment

    # torch.manual_seed(0)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False

    opts, config = train_options(yaml)
    ####################################################################
    # Data Loading and Preprocessing
    ####################################################################
    X_train, y_train, X_val, y_val, X_test, y_test = load_dataset(
        config["data"]["dataset"])

    # load word embeddings
    # if config["data"]["embeddings"] == "wiki.en.vec":
    #     word2idx, idx2word, weights = load_word_vectors_from_fasttext(
    #         os.path.join(EMB_DIR, config["data"]["embeddings"]),
    #         config["data"]["embeddings_dim"])
    # else:
    #     word2idx, idx2word, weights = load_word_vectors(
    #         os.path.join(EMB_DIR, config["data"]["embeddings"]),
    #         config["data"]["embeddings_dim"])

    ####################################################################
    # Linguistic Features Loading and Selection
    ####################################################################
    # Any features/lexicon should be in the form of a dictionary
    # For example: lex = {'word':[0., 1., ..., 0.]}

    # load affect features
    print("Loading linguistic features...")
    # todo: streamline feature loading pipeline
    features, feat_length = load_features(config["data"]["features"])
    # assert ... same len

    # build dataset
    print("Building training dataset...")
    train_set = ClfDataset(X_train,
                           y_train,
                           word2idx,
                           feat_length=feat_length,
                           features_dict=features)

    print("Building validation dataset...")
    val_set = ClfDataset(X_test,
                         y_test,
                         word2idx,
                         features_dict=features,
                         feat_length=feat_length)

    test_set = ClfDataset(X_test,
                          y_test,
                          word2idx,
                          features_dict=features,
                          feat_length=feat_length)
    train_set.truncate(500)
    val_set.truncate(100)

    src_lengths = [len(x) for x in train_set.data]
    val_lengths = [len(x) for x in val_set.data]
    test_lengths = [len(x) for x in test_set.data]

    # select sampler & dataloader
    train_sampler = BucketBatchSampler(src_lengths, config["batch_size"], True)
    val_sampler = SortedSampler(val_lengths)
    val_sampler_train = SortedSampler(src_lengths)
    test_sampler = SortedSampler(test_lengths)

    train_loader = DataLoader(train_set,
                              batch_sampler=train_sampler,
                              num_workers=opts.cores,
                              collate_fn=ClfCollate_withFeatures())
    val_loader = DataLoader(val_set,
                            sampler=val_sampler,
                            batch_size=config["batch_size"],
                            num_workers=opts.cores,
                            collate_fn=ClfCollate_withFeatures())
    val_loader_train_dataset = DataLoader(train_set,
                                          sampler=val_sampler_train,
                                          batch_size=config["batch_size"],
                                          num_workers=opts.cores,
                                          collate_fn=ClfCollate_withFeatures())
    test_loader = DataLoader(test_set,
                             sampler=test_sampler,
                             batch_size=config["batch_size"],
                             num_workers=opts.cores,
                             collate_fn=ClfCollate_withFeatures())

    ####################################################################
    # Model
    ####################################################################
    model = BaselineConcClassifier(ntokens=weights.shape[0],
                                   nclasses=len(set(train_set.labels)),
                                   feat_size=feat_length,
                                   **config["model"])
    model.word_embedding.embedding.weight = nn.Parameter(
        torch.from_numpy(weights), requires_grad=False)
    model.to(opts.device)
    print(model)

    ####################################################################
    # Count total parameters of model
    ####################################################################
    total_params = sum(p.numel() for p in model.parameters())
    total_trainable_params = sum(p.numel() for p in model.parameters()
                                 if p.requires_grad)

    print("Total Params:", number_h(total_params))
    print("Total Trainable Params:", number_h(total_trainable_params))

    if config["class_weights"]:
        class_weights = class_weigths(train_set.labels, to_pytorch=True)
        class_weights = class_weights.to(opts.device)

        criterion = nn.CrossEntropyLoss(weight=class_weights)
    else:
        criterion = nn.CrossEntropyLoss()

    clf_parameters = filter(lambda p: p.requires_grad, model.parameters())
    clf_optimizer = Adam(clf_parameters, weight_decay=1e-5)

    ####################################################################
    # Training Pipeline
    ####################################################################
    _outputs = []

    def batch_callback(i_batch, losses, batch_outputs):
        _outputs.append(batch_outputs)

        if trainer.step % config["log_interval"] == 0:
            outputs = list(zip(*_outputs))
            _losses = numpy.array(losses[-config["log_interval"]:]).mean(0)
            exp.update_metric("clf-loss", _losses)
            _y_hat = torch.cat(outputs[0]).max(-1)[1].cpu().data.numpy()
            _y = torch.cat(outputs[1]).cpu().data.numpy()
            f1 = f1_score(_y, _y_hat, average='macro')
            exp.update_metric("f1-train", f1)

            losses_log = exp.log_metrics(["clf-loss", 'f1-train'])
            exp.update_value("progress",
                             trainer.progress_log + "\n" + losses_log)

            # clean lines and move cursor back up N lines
            print("\n\033[K" + losses_log)
            print("\033[F" * (len(losses_log.split("\n")) + 2))

            _outputs.clear()

    # Trainer: responsible for managing the training process
    trainer = ClfTrainer_withFeatures(
        model=model,
        train_loader=train_loader,
        valid_loader=val_loader,
        valid_loader_train_set=val_loader_train_dataset,
        test_loader=test_loader,
        criterion=criterion,
        optimizers=clf_optimizer,
        config=config,
        device=opts.device,
        batch_end_callbacks=None)

    ####################################################################
    # Experiment: logging and visualizing the training process
    ####################################################################
    if cluster is False:
        exp = Experiment(opts.name,
                         config,
                         src_dirs=opts.source,
                         output_dir=EXP_DIR)

        exp.add_metric("ep_loss", "line", "epoch loss", ["TRAIN", "VAL"])
        exp.add_metric("ep_f1", "line", "epoch f1", ["TRAIN", "VAL"])
        exp.add_metric("ep_acc", "line", "epoch accuracy", ["TRAIN", "VAL"])
        exp.add_metric("ep_pre", "line", "epoch precision", ["TRAIN", "VAL"])
        exp.add_metric("ep_rec", "line", "epoch recall", ["TRAIN", "VAL"])

        exp.add_value("epoch", title="epoch summary", vis_type="text")
        exp.add_value("progress", title="training progress", vis_type="text")

    ####################################################################
    # Training Loop
    ####################################################################
    best_loss = None
    early_stopping = Early_stopping("min", config["patience"])

    for epoch in range(config["epochs"]):
        train_loss = trainer.train_epoch()
        val_loss, y, y_pred = trainer.eval_epoch(val_set=True)
        _, y_train, y_pred_train = trainer.eval_epoch(train_set=True)

        # Calculate accuracy and f1-macro on the evaluation set
        if cluster is False:
            exp.update_metric("ep_loss", train_loss.item(), "TRAIN")
            exp.update_metric("ep_loss", val_loss.item(), "VAL")

            exp.update_metric("ep_f1", f1_macro(y_train, y_pred_train),
                              "TRAIN")
            exp.update_metric("ep_f1", f1_macro(y, y_pred), "VAL")

            exp.update_metric("ep_acc", acc(y_train, y_pred_train), "TRAIN")
            exp.update_metric("ep_acc", acc(y, y_pred), "VAL")

            exp.update_metric("ep_pre", precision_macro(y_train, y_pred_train),
                              "TRAIN")
            exp.update_metric("ep_pre", precision_macro(y, y_pred), "VAL")

            exp.update_metric("ep_rec", recall_macro(y_train, y_pred_train),
                              "TRAIN")
            exp.update_metric("ep_rec", recall_macro(y, y_pred), "VAL")

            print()
            epoch_log = exp.log_metrics(
                ["ep_loss", "ep_f1", "ep_acc", "ep_pre", "ep_rec"])
            print(epoch_log)
            exp.update_value("epoch", epoch_log)

            exp.save()
        else:
            print("epoch: {}, train loss: {}, val loss: {}, f1: {}".format(
                epoch, train_loss.item(), val_loss.item(), f1_macro(y,
                                                                    y_pred)))

        # Save the model if the validation loss is the best we've seen so far.
        if not best_loss or val_loss < best_loss:
            best_loss = val_loss
            trainer.best_val_loss = best_loss
            trainer.acc = acc(y, y_pred)
            trainer.f1 = f1_macro(y, y_pred)
            trainer.precision = precision_macro(y, y_pred)
            trainer.recall = recall_macro(y, y_pred)

            trainer.checkpoint(name=opts.name, verbose=False)

        if early_stopping.stop(val_loss):
            print("Early Stopping...")
            break

        print("\n")

    # return trainer.best_val_loss, trainer.acc, trainer.f1, trainer.precision, trainer.recall
    #################
    # Test
    #################
    _, y_test_, y_test_predicted = trainer.eval_epoch(test_set=True)
    f1_test = f1_macro(y_test_, y_test_predicted)
    acc_test = acc(y_test_, y_test_predicted)
    print("#" * 33)
    print("F1 for test set: {}".format(f1_test))
    print("Accuracy for test set: {}".format(acc_test))
    print("#" * 33)

    return trainer.best_val_loss, trainer.acc, trainer.f1, trainer.precision, trainer.recall, f1_test, acc_test
Пример #4
0
def sent_clf(dataset, config, opts, transfer=False):
    from logger.experiment import Experiment

    opts.name = config["name"]
    X_train, y_train, X_val, y_val = dataset
    vocab = None
    if transfer:
        opts.transfer = config["pretrained_lm"]
        checkpoint = load_checkpoint(opts.transfer)
        config["vocab"].update(checkpoint["config"]["vocab"])
        dict_pattern_rename(checkpoint["config"]["model"],
                            {"rnn_": "bottom_rnn_"})
        config["model"].update(checkpoint["config"]["model"])
        vocab = checkpoint["vocab"]

    ####################################################################
    # Load Preprocessed Datasets
    ####################################################################
    if config["preprocessor"] == "twitter":
        preprocessor = twitter_preprocessor()
    else:
        preprocessor = None

    print("Building training dataset...")
    train_set = ClfDataset(X_train,
                           y_train,
                           vocab=vocab,
                           preprocess=preprocessor,
                           vocab_size=config["vocab"]["size"],
                           seq_len=config["data"]["seq_len"])

    print("Building validation dataset...")
    val_set = ClfDataset(X_val,
                         y_val,
                         seq_len=train_set.seq_len,
                         preprocess=preprocessor,
                         vocab=train_set.vocab)

    src_lengths = [len(x) for x in train_set.data]
    val_lengths = [len(x) for x in val_set.data]

    # select sampler & dataloader
    train_sampler = BucketBatchSampler(src_lengths, config["batch_size"], True)
    val_sampler = SortedSampler(val_lengths)
    val_sampler_train = SortedSampler(src_lengths)

    train_loader = DataLoader(train_set,
                              batch_sampler=train_sampler,
                              num_workers=opts.cores,
                              collate_fn=ClfCollate())
    val_loader = DataLoader(val_set,
                            sampler=val_sampler,
                            batch_size=config["batch_size"],
                            num_workers=opts.cores,
                            collate_fn=ClfCollate())
    val_loader_train_dataset = DataLoader(train_set,
                                          sampler=val_sampler_train,
                                          batch_size=config["batch_size"],
                                          num_workers=opts.cores,
                                          collate_fn=ClfCollate())
    ####################################################################
    # Model
    ####################################################################
    ntokens = len(train_set.vocab)
    model = Classifier(ntokens, len(set(train_set.labels)), **config["model"])
    model.to(opts.device)

    clf_criterion = nn.CrossEntropyLoss()
    lm_criterion = nn.CrossEntropyLoss(ignore_index=0)

    embed_parameters = filter(lambda p: p.requires_grad,
                              model.embed.parameters())
    bottom_parameters = filter(
        lambda p: p.requires_grad,
        chain(model.bottom_rnn.parameters(), model.vocab.parameters()))
    if config["model"]["has_att"]:
        top_parameters = filter(
            lambda p: p.requires_grad,
            chain(model.top_rnn.parameters(), model.attention.parameters(),
                  model.classes.parameters()))
    else:
        top_parameters = filter(
            lambda p: p.requires_grad,
            chain(model.top_rnn.parameters(), model.classes.parameters()))

    embed_optimizer = optim.ASGD(embed_parameters, lr=0.0001)
    rnn_optimizer = optim.ASGD(bottom_parameters)
    top_optimizer = Adam(top_parameters, lr=config["top_lr"])
    ####################################################################
    # Training Pipeline
    ####################################################################

    # Trainer: responsible for managing the training process
    trainer = SentClfTrainer(model,
                             train_loader,
                             val_loader, (lm_criterion, clf_criterion),
                             [embed_optimizer, rnn_optimizer, top_optimizer],
                             config,
                             opts.device,
                             valid_loader_train_set=val_loader_train_dataset,
                             unfreeze_embed=config["unfreeze_embed"],
                             unfreeze_rnn=config["unfreeze_rnn"])

    ####################################################################
    # Experiment: logging and visualizing the training process
    ####################################################################

    exp = Experiment(opts.name,
                     config,
                     src_dirs=opts.source,
                     output_dir=EXP_DIR)
    exp.add_metric("ep_loss_lm", "line", "epoch loss lm", ["TRAIN", "VAL"])
    exp.add_metric("ep_loss_cls", "line", "epoch loss class", ["TRAIN", "VAL"])
    exp.add_metric("ep_f1", "line", "epoch f1", ["TRAIN", "VAL"])
    exp.add_metric("ep_acc", "line", "epoch accuracy", ["TRAIN", "VAL"])

    exp.add_value("epoch", title="epoch summary")
    exp.add_value("progress", title="training progress")

    ####################################################################
    # Resume Training from a previous checkpoint
    ####################################################################
    if transfer:
        print("Transferring Encoder weights ...")
        dict_pattern_rename(checkpoint["model"], {
            "encoder": "bottom_rnn",
            "decoder": "vocab"
        })
        load_state_dict_subset(model, checkpoint["model"])
    print(model)

    ####################################################################
    # Training Loop
    ####################################################################
    best_loss = None
    early_stopping = EarlyStopping("min", config["patience"])

    for epoch in range(0, config["epochs"]):

        train_loss = trainer.train_epoch()
        val_loss, y, y_pred = trainer.eval_epoch(val_set=True)
        _, y_train, y_pred_train = trainer.eval_epoch(train_set=True)
        exp.update_metric("ep_loss_lm", train_loss[0], "TRAIN")
        exp.update_metric("ep_loss_lm", val_loss[0], "VAL")

        exp.update_metric("ep_loss_cls", train_loss[1], "TRAIN")
        exp.update_metric("ep_loss_cls", val_loss[1], "VAL")

        exp.update_metric("ep_f1", f1_macro(y_train, y_pred_train), "TRAIN")
        exp.update_metric("ep_f1", f1_macro(y, y_pred), "VAL")

        exp.update_metric("ep_acc", acc(y_train, y_pred_train), "TRAIN")
        exp.update_metric("ep_acc", acc(y, y_pred), "VAL")

        print()
        epoch_log = exp.log_metrics(
            ["ep_loss_lm", "ep_loss_cls", "ep_f1", "ep_acc"])
        print(epoch_log)
        exp.update_value("epoch", epoch_log)

        # Save the model if the val loss is the best we've seen so far.
        if not best_loss or val_loss[1] < best_loss:
            best_loss = val_loss[1]
            trainer.best_acc = acc(y, y_pred)
            trainer.best_f1 = f1_macro(y, y_pred)
            trainer.checkpoint(name=opts.name, timestamp=True)

        if early_stopping.stop(val_loss[1]):
            print("Early Stopping (according to classification loss)....")
            break

        print("\n" * 2)

    return best_loss, trainer.best_acc, trainer.best_f1
Пример #5
0
    def __init__(self,
                 model,
                 loaders,
                 optimizer,
                 pipeline,
                 config,
                 task="clf",
                 use_exp=False,
                 inspect_weights=False,
                 metrics=None,
                 eval_train=True,
                 checkpoint=None,
                 early_stopping=None):
        """
         The Trainer is responsible for training a model.
         It holds a set of variables that helps us to abstract
         the training process.

        Args:
            use_exp (bool): if True, use the integrated experiment
                manager. In order to utilize the visualizations provided
                by the experiment manager you should:
                    - run `python -m visdom.server` in a terminal.
                    - access visdom by going to http://localhost:8097

                    https://github.com/facebookresearch/visdom#usage

            model (nn.Module): the pytorch model
            optimizer ():
            pipeline (callable): a callback function, which defines the training
                pipeline. it must return 3 things (outputs, labels, loss):
                    - outputs: the outputs (predictions) of the model
                    - labels: the gold labels
                    - loss: the loss

            config (): the config instance with the hyperparams of the model
            task (string): you can choose between {"clf", "reg"},
                for classification and regression respectively.
            metrics (dict): a dictionary with the metrics that will be used
                for evaluating the performance of the model.
                - key: string with the name of the metric.
                - value: a callable, with arguments (y, y_hat) tha returns a
                    score.
            eval_train (bool): if True, the at the end of each epoch evaluate
                the performance of the model on the training dataset.
            early_stopping (EarlyStop):
            checkpoint (Checkpoint):
        """
        self.use_exp = use_exp
        self.inspect_weights = inspect_weights
        self.model = model
        self.task = task
        self.config = config
        self.eval_train = eval_train
        self.loaders = loaders
        self.optimizer = optimizer
        self.pipeline = pipeline
        self.checkpoint = checkpoint
        self.early_stopping = early_stopping
        self.metrics = {} if metrics is None else metrics

        self.running_loss = 0.0
        self.epoch = 0

        ########################################################
        # Initializations
        ########################################################
        _dataset_names = list(self.loaders.keys())
        _metric_names = list(self.metrics.keys()) + ["loss"]

        # init watched metrics
        self.scores = {
            metric: {d: []
                     for d in _dataset_names}
            for metric in _metric_names
        }

        if self.checkpoint is not None:
            self.checkpoint.scores = self.scores
        if self.early_stopping is not None:
            self.early_stopping.scores = self.scores

        # init Experiment
        if use_exp:
            self.experiment = Experiment(name=self.config["name"],
                                         desc=str(self.model),
                                         hparams=self.config)

            for metric in _metric_names:
                self.experiment.add_metric(
                    Metric(name=metric, tags=_dataset_names, vis_type="line"))

            if self.inspect_weights:
                self.inspector = Inspector(model, ["std", "mean"])
Пример #6
0
class Trainer:
    def __init__(self,
                 model,
                 loaders,
                 optimizer,
                 pipeline,
                 config,
                 task="clf",
                 use_exp=False,
                 inspect_weights=False,
                 metrics=None,
                 eval_train=True,
                 checkpoint=None,
                 early_stopping=None):
        """
         The Trainer is responsible for training a model.
         It holds a set of variables that helps us to abstract
         the training process.

        Args:
            use_exp (bool): if True, use the integrated experiment
                manager. In order to utilize the visualizations provided
                by the experiment manager you should:
                    - run `python -m visdom.server` in a terminal.
                    - access visdom by going to http://localhost:8097

                    https://github.com/facebookresearch/visdom#usage

            model (nn.Module): the pytorch model
            optimizer ():
            pipeline (callable): a callback function, which defines the training
                pipeline. it must return 3 things (outputs, labels, loss):
                    - outputs: the outputs (predictions) of the model
                    - labels: the gold labels
                    - loss: the loss

            config (): the config instance with the hyperparams of the model
            task (string): you can choose between {"clf", "reg"},
                for classification and regression respectively.
            metrics (dict): a dictionary with the metrics that will be used
                for evaluating the performance of the model.
                - key: string with the name of the metric.
                - value: a callable, with arguments (y, y_hat) tha returns a
                    score.
            eval_train (bool): if True, the at the end of each epoch evaluate
                the performance of the model on the training dataset.
            early_stopping (EarlyStop):
            checkpoint (Checkpoint):
        """
        self.use_exp = use_exp
        self.inspect_weights = inspect_weights
        self.model = model
        self.task = task
        self.config = config
        self.eval_train = eval_train
        self.loaders = loaders
        self.optimizer = optimizer
        self.pipeline = pipeline
        self.checkpoint = checkpoint
        self.early_stopping = early_stopping
        self.metrics = {} if metrics is None else metrics

        self.running_loss = 0.0
        self.epoch = 0

        ########################################################
        # Initializations
        ########################################################
        _dataset_names = list(self.loaders.keys())
        _metric_names = list(self.metrics.keys()) + ["loss"]

        # init watched metrics
        self.scores = {
            metric: {d: []
                     for d in _dataset_names}
            for metric in _metric_names
        }

        if self.checkpoint is not None:
            self.checkpoint.scores = self.scores
        if self.early_stopping is not None:
            self.early_stopping.scores = self.scores

        # init Experiment
        if use_exp:
            self.experiment = Experiment(name=self.config["name"],
                                         desc=str(self.model),
                                         hparams=self.config)

            for metric in _metric_names:
                self.experiment.add_metric(
                    Metric(name=metric, tags=_dataset_names, vis_type="line"))

            if self.inspect_weights:
                self.inspector = Inspector(model, ["std", "mean"])

    def __on_after_train(self):
        pass

    def __on_after_eval(self):

        # 4 - update the corresponding values in the experiment
        if self.use_exp:
            for score_name, score in self.scores.items():
                for tag, value in score.items():
                    self.experiment.metrics[score_name].append(tag, value[-1])

            self.experiment.update_plots()
            if self.inspect_weights:
                self.inspector.update_state(self.model)

    def train_loader(self, loader):
        """
        Run a pass of the model on a given dataloader
        Args:
            loader ():

        Returns:

        """
        # switch to train mode -> enable regularization layers, such as Dropout
        self.model.train()

        running_loss = 0.0
        for i_batch, sample_batched in enumerate(loader, 1):
            # 1 - zero the gradients
            self.optimizer.zero_grad()

            # 2 - compute loss using the provided pipeline
            outputs, labels, attentions, loss = self.pipeline(
                self.model, sample_batched)

            # 3 - backward pass: compute gradient wrt model parameters
            loss.backward()

            # just to be sure... clip gradients with norm > N.
            # apply it only if the model has an RNN in it.
            if len([
                    m for m in self.model.modules()
                    if hasattr(m, 'bidirectional')
            ]) > 0:
                clip_grad_norm_(self.model.parameters(),
                                self.config["clip_norm"])

            # 4 - update weights
            self.optimizer.step()

            running_loss += loss.item()

            # print statistics
            epoch_progress(loss=loss.item(),
                           epoch=self.epoch,
                           batch=i_batch,
                           batch_size=loader.batch_size,
                           dataset_size=len(loader.dataset))
        return running_loss

    def train(self):
        """
        Train the model for one epoch (on one or more dataloaders)
        Returns:

        """
        self.epoch += 1
        self.train_loader(self.loaders["train"])
        print()
        self.__on_after_train()

    def eval_loader(self, loader):
        """
        Evaluate a dataloader
        and update the corresponding scores and metrics
        Args:
            loader ():
            tag ():

        Returns:

        """
        # 1 - evaluate the dataloader
        avg_loss, (y, y_pred), posteriors, attentions = predict(
            self.model, self.pipeline, loader, self.task, "eval")

        return avg_loss, (y, y_pred), posteriors, attentions

    def eval(self):
        """
        Evaluate the model on each dataset and update the corresponding metrics.
        The function is normally called at the end of each epoch.
        Returns:

        """

        for k, v in self.loaders.items():
            avg_loss, (y, y_pred), _, _ = self.eval_loader(v)

            scores = self.__calc_scores(y, y_pred)

            self.__log_scores(scores, avg_loss, k)

            scores["loss"] = avg_loss

            for name, value in scores.items():
                self.scores[name][k].append(value)

        self.__on_after_eval()

    def __calc_scores(self, y, y_pred):
        return {
            name: metric(y, y_pred)
            for name, metric in self.metrics.items()
        }

    def __log_scores(self, scores, loss, tag):
        """
        Log the scores of a dataset (tag) on the console
        Args:
            scores (): a dictionary of (metric_name, value)
            loss (): the loss of the model on an epoch
            tag (): the dataset (name)

        Returns:

        """
        print("\t{:6s} - ".format(tag), end=" ")
        for name, value in scores.items():
            print(name, '{:.4f}'.format(value), end=", ")
        print(" Loss:{:.4f}".format(loss))

    def log_training(self, name, desc):

        results = {}
        scores = {k: v for k, v in self.scores.items() if k != "loss"}

        results["name"] = name
        results["desc"] = desc
        results["scores"] = scores

        path = os.path.join(EXPS_PATH, self.config["name"])

        ####################################
        # JSON
        ####################################
        json_file = path + ".json"
        try:
            with open(json_file) as f:
                data = json.load(f)
        except:
            data = []

        data.append(results)

        with open(json_file, 'w') as f:
            json.dump(data, f)

        ####################################
        # CSV
        ####################################
        _results = []
        for result in data:
            _result = {k: v for k, v in result.items() if k != "scores"}
            for score_name, score in result["scores"].items():
                for tag, values in score.items():
                    _result["_".join([score_name, tag, "min"])] = min(values)
                    _result["_".join([score_name, tag, "max"])] = max(values)
            _results.append(_result)

        with open(path + ".csv", 'w') as f:
            pandas.DataFrame(_results).to_csv(f, sep=',', encoding='utf-8')
Пример #7
0
                         losses_log)

        # clean lines and move cursor back up N lines
        print("\n\033[K" + losses_log)
        print("\033[F" * (len(losses_log.split("\n")) + 2))


# Trainer: responsible for managing the training process
trainer = LMTrainer(model, train_loader, val_loader, loss_function,
                    [optimizer], config, opts.device,
                    batch_end_callbacks=[batch_callback])

####################################################################
# Experiment: logging and visualizing the training process
####################################################################
exp = Experiment(opts.name, config, src_dirs=opts.source, output_dir=EXP_DIR)
exp.add_metric("loss", "line")
exp.add_metric("ppl", "line", "perplexity")
exp.add_metric("ep_loss", "line", "epoch loss", ["TRAIN", "VAL"])
exp.add_metric("ep_ppl", "line", "epoch perplexity", ["TRAIN", "VAL"])
exp.add_value("progress", title="training progress")
exp.add_value("epoch", title="epoch summary")

####################################################################
# Training Loop
####################################################################
best_loss = None
for epoch in range(config["epochs"]):
    train_loss = trainer.train_epoch()
    val_loss = trainer.eval_epoch()
    exp.update_metric("ep_loss", train_loss, "TRAIN")