Exemplo n.º 1
0
def get_baseline_experiment(experiment_name):
    generability_baseline_model = HAN(20, 10, 300, 2, nb_layers, .25).eval()
    generability_baseline_experiment = Experiment(
        experiment_name,
        generability_baseline_model,
        monitor_metric="val_fscore_macro",
        monitor_mode="max",
        loss_function='cross_entropy',
        task="classification",
        epoch_metrics=[FBeta(average='macro')],
        device=0)
    generability_baseline_experiment.load_checkpoint('best')

    return generability_baseline_experiment
Exemplo n.º 2
0
 def _retrain(
     self,
     experiment: Experiment,
     train_generator: DatasetContainer,
     valid_generator: DatasetContainer,
     epochs: int,
     seed: int,
     callbacks: List,
     disable_tensorboard: bool,
     capturing_context: bool,
 ) -> List[Dict]:
     # pylint: disable=too-many-arguments
     # If Poutyne 1.7 and before, we capture poutyne print since it print some exception.
     # Otherwise, we use a null context manager.
     with Capturing() if capturing_context else contextlib.nullcontext():
         train_res = experiment.train(
             train_generator,
             valid_generator=valid_generator,
             epochs=epochs,
             seed=seed,
             callbacks=callbacks,
             verbose=self.verbose,
             disable_tensorboard=disable_tensorboard,
         )
     return train_res
Exemplo n.º 3
0
def get_proposed_hmc_experiment(experiment_name):
    generability_proposed_model = MLHAN_MultiLabel(20, 10, 300, [3, 2],
                                                   nb_layers, .25).eval()
    generability_proposed_experiment = Experiment(
        experiment_name,
        generability_proposed_model,
        monitor_metric="val_fscore_macro",
        monitor_mode="max",
        loss_function=MultiLevelMultiLabelLoss(),
        epoch_metrics=[
            FBetaLowerLevelMultiLabel(average='macro'),
            FBetaUpperLevelMultiLabel(average='macro')
        ],
        device=0)
    generability_proposed_experiment.load_checkpoint('best')

    return generability_proposed_experiment
def main(cfg: DictConfig) -> None:
    log.info("Init of the training")
    seed = cfg.setting.seed
    set_seeds(seed)

    # cfg.MlFlow.params["root_path"] = hydra.utils.get_original_cwd()
    writer_callback = instantiate(cfg.MlFlow)
    writer_callback.log_config_params(cfg)


    log.info("Loading of the dataset and embedding model")
    transform = transforms.Compose([
        transforms.ToTensor()
    ])

    train = DataLoader(data.CIFAR10(root="..\..\data", train=True, download=True, transform=transform), batch_size=cfg.data_loader.batch_size)
    val = DataLoader(data.CIFAR10(root="..\..\data", train=False, download=True, transform=transform), batch_size=cfg.data_loader.batch_size)

    network = instantiate(cfg.model)

    optimizer = instantiate(cfg.optimizer, network.parameters())
    loss = nn.CrossEntropyLoss()

    saving_directory = os.path.join(hydra.utils.get_original_cwd(), cfg.poutyne.root_logging_directory,
                                    writer_callback.experiment_id,
                                    writer_callback.run_id)

    monitored_metric = "val_acc"
    monitor_mode = "max"


    experiment = Experiment(directory=saving_directory, network=network, device=cfg.device, optimizer=optimizer,
                            loss_function=loss, batch_metrics=["acc"], epoch_metrics=[F1()],
                            logging=cfg.poutyne.logging, monitor_metric=monitored_metric, monitor_mode=monitor_mode)

    log.info("Start of the training")
    experiment.train(train_generator=train, valid_generator=val, epochs=cfg.trainer.num_epochs,
                     seed=seed, callbacks=[writer_callback])

    log.info("Start of the testing of the trained model")
    test_result = experiment.test(test_generator=val, seed=seed)

    writer_callback.on_test_end(test_result)
Exemplo n.º 5
0
    def test(self,
             test_dataset_container: DatasetContainer,
             batch_size: int,
             num_workers: int = 1,
             callbacks: Union[List, None] = None,
             seed: int = 42,
             logging_path: str = "./chekpoints",
             checkpoint: Union[str, int] = "best") -> Dict:
        # pylint: disable=too-many-arguments
        """
        Method to test a retrained or a pre-trained model using a dataset with the same tags. We train using
        `experiment <https://poutyne.org/experiment.html>`_ from `poutyne <https://poutyne.org/index.html>`_
        framework. The experiment module allow us to save checkpoints ``ckpt`` (pickle format) and a log.tsv where
        the best epochs can be found (the best epoch is use in test).

        Args:
            test_dataset_container (~deepparse.deepparse.dataset_container.dataset_container.DatasetContainer):
                The test dataset container of the data to use.
            callbacks (Union[List, None]): List of callbacks to use during training.
                See Poutyne `callback <https://poutyne.org/callbacks.html#callback-class>`_ for more information.
                By default we set no callback.
            seed (int): Seed to use (by default 42).
            logging_path (str): The logging path for the checkpoints. By default the path is ``./chekpoints``.
            checkpoint (Union[str, int]): Checkpoint to use for the test.
                - If 'best', will load the best weights.
                - If 'last', will load the last model checkpoint.
                - If int, will load a specific checkpoint (e.g. 3).
                - If 'str', will load a specific model (e.g. a retrained model), must be a path to a pickled format
                    model i.e. ends with a '.p' extension (e.g. retrained_model.p).
                - If 'fasttext', will load our pre-trained fasttext model and test it on your data.
                    (Need to have Poutyne>=1.2 to work)
                - If 'bpemb', will load our pre-trained bpemb model and test it on your data.
                    (Need to have Poutyne>=1.2 to work)
        Return:
            A dictionary with the best epoch stats (see `Experiment class
            <https://poutyne.org/experiment.html#poutyne.Experiment.train>`_ for details).

        Note:
            We use NLL loss and accuracy as in the `article <https://arxiv.org/abs/2006.16152>`_.

        Example:

            .. code-block:: python

                    address_parser = AddressParser(device=0) #on gpu device 0
                    data_path = 'path_to_a_pickle_test_dataset.p'

                    test_container = PickleDatasetContainer(data_path)

                    address_parser.test(test_container) # using the default best epoch
                    address_parser.test(test_container, checkpoint='last') # using the last epoch
                    address_parser.test(test_container, checkpoint=5) # using the epoch 5 model
        """
        if self.model_type == "fasttext-light":
            raise ValueError(
                "It's not possible to test a fasttext-light due to pymagnitude problem."
            )

        callbacks = [] if callbacks is None else callbacks
        data_transform = self._set_data_transformer()

        test_generator = DataLoader(test_dataset_container,
                                    collate_fn=data_transform.output_transform,
                                    batch_size=batch_size,
                                    num_workers=num_workers)

        exp = Experiment(logging_path,
                         self.model,
                         device=self.device,
                         loss_function=nll_loss,
                         batch_metrics=[accuracy])

        checkpoint = handle_checkpoint(checkpoint)

        test_res = exp.test(test_generator,
                            seed=seed,
                            callbacks=callbacks,
                            checkpoint=checkpoint)
        return test_res
Exemplo n.º 6
0
    def retrain(self,
                dataset_container: DatasetContainer,
                train_ratio: float,
                batch_size: int,
                epochs: int,
                num_workers: int = 1,
                learning_rate: float = 0.01,
                callbacks: Union[List, None] = None,
                seed: int = 42,
                logging_path: str = "./chekpoints") -> List[Dict]:
        # pylint: disable=too-many-arguments, line-too-long, too-many-locals
        """
        Method to retrain the address parser model using a dataset with the same tags. We train using
        `experiment <https://poutyne.org/experiment.html>`_ from `poutyne <https://poutyne.org/index.html>`_
        framework. The experiment module allow us to save checkpoints ``ckpt`` (pickle format) and a log.tsv where
        the best epochs can be found (the best epoch is used in test).

        Args:
            dataset_container (~deepparse.deepparse.dataset_container.dataset_container.DatasetContainer): The
                dataset container of the data to use.
            train_ratio (float): The ratio to use of the dataset for the training. The rest of the data is used for the validation
                (e.g. a train ratio of 0.8 mean a 80-20 train-valid split).
            batch_size (int): The size of the batch.
            epochs (int): number of training epochs.
            num_workers (int): Number of workers to use for the data loader (default is 1 worker).
            learning_rate (float): The learning rate (LR) to use for training (default 0.01). To reduce the LR during
                training, use `Poutyne learning rate scheduler callback
                <https://github.com/GRAAL-Research/poutyne/blob/master/poutyne/framework/callbacks/lr_scheduler.py>`_.
            callbacks (Union[List, None]): List of callbacks to use during training.
                See Poutyne `callback <https://poutyne.org/callbacks.html#callback-class>`_ for more information. By default
                we set no callback.
            seed (int): Seed to use (by default 42).
            logging_path (str): The logging path for the checkpoints. By default the path is ``./chekpoints``.

        Return:
            A list of dictionary with the best epoch stats (see `Experiment class
            <https://poutyne.org/experiment.html#poutyne.Experiment.train>`_ for details).

        Note:
            We use SGD optimizer, NLL loss and accuracy as a metric, the data is shuffled and we use teacher forcing during
            training (with a prob of 0.5) as in the `article <https://arxiv.org/abs/2006.16152>`_.

        Note:
            Due to pymagnitude, we could not train using the Magnitude embeddings, meaning it's not possible to
            train using the fasttext-light model. But, since we don't update the embeddings weights, one can retrain
            using the fasttext model and later on use the weights with the fasttext-light.

        Example:

            .. code-block:: python

                    address_parser = AddressParser(device=0) #on gpu device 0
                    data_path = 'path_to_a_pickle_dataset.p'

                    container = PickleDatasetContainer(data_path)

                    address_parser.retrain(container, 0.8, epochs=1, batch_size=128)

            Using learning rate scheduler callback.

            .. code-block:: python

                    import poutyne

                    address_parser = AddressParser(device=0)
                    data_path = 'path_to_a_pickle_dataset.p'

                    container = PickleDatasetContainer(data_path)

                    lr_scheduler = poutyne.StepLR(step_size=1, gamma=0.1) # reduce LR by a factor of 10 each epoch
                    address_parser.retrain(container, 0.8, epochs=5, batch_size=128, callbacks=[lr_scheduler])

        See `this <https://github.com/GRAAL-Research/deepparse/blob/master/examples/fine_tuning.py>`_ for a fine
        tuning example.
        """
        if self.model_type == "fasttext-light":
            raise ValueError(
                "It's not possible to retrain a fasttext-light due to pymagnitude problem."
            )

        callbacks = [] if callbacks is None else callbacks
        train_generator, valid_generator = self._create_training_data_generator(
            dataset_container, train_ratio, batch_size, num_workers, seed=seed)

        optimizer = SGD(self.model.parameters(), learning_rate)

        exp = Experiment(logging_path,
                         self.model,
                         device=self.device,
                         optimizer=optimizer,
                         loss_function=nll_loss,
                         batch_metrics=[accuracy])

        train_res = exp.train(train_generator,
                              valid_generator=valid_generator,
                              epochs=epochs,
                              seed=seed,
                              callbacks=callbacks)
        return train_res
Exemplo n.º 7
0
def launch(dataset, experiment_name, network, hidden_size, hidden_layers, sample_size, weight_decay, prior,\
           learning_rate, lr_patience, optim_algo, epochs, batch_size, valid_size, pre_epochs, stop_early,\
           gpu_device, random_seed, logging):

    # Setting random seed for reproducibility
    random_state = check_random_state(random_seed)
    torch.manual_seed(random_seed)

    # Pac-Bayes Bound parameters
    delta = 0.05
    C_range = torch.Tensor(np.arange(0.1, 20.0, 0.01))

    # Setting GPU device
    device = None
    if torch.cuda.is_available() and gpu_device != -1:
        torch.cuda.set_device(gpu_device)
        device = torch.device('cuda:%d' % gpu_device)
        print("Running on GPU %d" % gpu_device)
    else:
        print("Running on CPU")

    # Logging
    experiment_setting = dict([('experiment_name', experiment_name),
                               ('dataset', dataset), ('network', network),
                               ('hidden_size', hidden_size),
                               ('hidden_layers', hidden_layers),
                               ('sample_size', sample_size),
                               ('epochs', epochs),
                               ('weight_decay', weight_decay),
                               ('prior', prior),
                               ('learning_rate', learning_rate),
                               ('lr_patience', lr_patience),
                               ('optim_algo', optim_algo),
                               ('batch_size', batch_size),
                               ('valid_size', valid_size),
                               ('pre_epochs', pre_epochs),
                               ('stop_early', stop_early),
                               ('random_seed', random_seed)])

    directory_name = get_logging_dir_name(experiment_setting)

    logging_path = join(RESULTS_PATH, experiment_name, dataset, directory_name)
    if logging:
        if not exists(logging_path): makedirs(logging_path)
        with open(join(logging_path, "setting.json"), 'w') as out_file:
            json.dump(experiment_setting, out_file, sort_keys=True, indent=4)

    # Loading dataset
    dataset_loader = DatasetLoader(random_state=random_state)
    X_train, X_test, y_train, y_test = dataset_loader.load(dataset)
    X_train, X_valid, y_train, y_valid = train_test_split(
        X_train, y_train, test_size=valid_size, random_state=random_state)

    # Experiment
    batch_metrics = [accuracy]
    epoch_metrics = []
    save_every_epoch = False
    cost_function = linear_loss
    monitor_metric = 'val_loss'
    valid_set_use = 'val'
    callbacks = []

    if network in ['pbgnet', 'pbcombinet']:
        print("### Using Pac-Bayes Binary Gradient Network ###")
        if prior in ['zero', 'init']:
            valid_set_use = 'train'
            X_train = np.vstack([X_train, X_valid])
            y_train = np.vstack([y_train, y_valid])
        elif prior == 'pretrain':
            valid_set_use = 'pretrain'

        if network == 'pbgnet':
            net = PBGNet(X_train.shape[1], hidden_layers * [hidden_size],
                         X_train.shape[0], sample_size, delta)
        else:
            net = PBCombiNet(X_train.shape[1], hidden_layers * [hidden_size],
                             X_train.shape[0], delta)
        monitor_metric = 'bound'
        cost_function = net.bound
        epoch_metrics.append(
            MasterMetricLogger(network=net,
                               loss_function=linear_loss,
                               delta=delta,
                               n_examples=X_train.shape[0]))

    elif network in ['pbgnet_ll', 'pbcombinet_ll']:
        print(
            "### Using PAC-Bayes Gradient Network Architecture and Optimizing Linear Loss ###"
        )
        if network == 'pbgnet_ll':
            net = PBGNet(X_train.shape[1], hidden_layers * [hidden_size],
                         X_train.shape[0], sample_size, delta)
        else:
            net = PBCombiNet(X_train.shape[1], hidden_layers * [hidden_size],
                             X_train.shape[0], delta)
        epoch_metrics.append(
            MasterMetricLogger(network=net,
                               loss_function=linear_loss,
                               delta=delta,
                               n_examples=X_train.shape[0],
                               C_range=C_range.to(device)))
        callbacks.append(
            ModelCheckpoint(join(logging_path, 'bound_checkpoint_epoch.ckpt'),
                            temporary_filename=join(
                                logging_path,
                                'bound_checkpoint_epoch.tmp.ckpt'),
                            monitor='bound',
                            mode='min',
                            save_best_only=True))
    elif network == "baseline":
        print("### Running the Baseline Network with Tanh activations ###")
        net = BaselineNet(X_train.shape[1], hidden_layers * [hidden_size],
                          torch.nn.Tanh)

    if network.startswith('pb'):
        epoch_metrics.append(MetricLogger(network=net, key='bound'))
        epoch_metrics.append(MetricLogger(network=net, key='kl'))
        epoch_metrics.append(MetricLogger(network=net, key='C'))

    # Parameters initialization
    if prior in ['zero', 'init']:
        net.init_weights()

    elif prior == 'pretrain':
        print("### Pre-training network ###")
        if network == 'pbgnet':
            pre_net = PBGNet(X_valid.shape[1], hidden_layers * [hidden_size],
                             X_valid.shape[0], sample_size, delta)
        else:
            pre_net = PBCombiNet(X_valid.shape[1],
                                 hidden_layers * [hidden_size],
                                 X_valid.shape[0], delta)

        pre_net.init_weights()
        pre_optimizer = torch.optim.Adam(pre_net.parameters(),
                                         lr=learning_rate,
                                         weight_decay=0.0)
        pre_logging_path = join(logging_path, 'pretrain')
        if not exists(pre_logging_path): makedirs(pre_logging_path)

        pretrain = Experiment(directory=pre_logging_path,
                              network=pre_net,
                              optimizer=pre_optimizer,
                              loss_function=linear_loss,
                              monitor_metric='loss',
                              device=device,
                              logging=logging,
                              batch_metrics=[accuracy])

        pretrain_loader = DataLoader(TensorDataset(torch.Tensor(X_valid),
                                                   torch.Tensor(y_valid)),
                                     batch_size,
                                     shuffle=True)

        pretrain.train(train_generator=pretrain_loader,
                       valid_generator=None,
                       epochs=pre_epochs,
                       save_every_epoch=False,
                       disable_tensorboard=True,
                       seed=random_seed)

        history = pd.read_csv(pretrain.log_filename, sep='\t')
        best_epoch_index = history['loss'].idxmin()
        best_epoch_stats = history.iloc[best_epoch_index:best_epoch_index + 1]
        best_epoch = best_epoch_stats['epoch'].item()
        ckpt_filename = pretrain.best_checkpoint_filename.format(
            epoch=best_epoch)
        weights = torch.load(ckpt_filename, map_location='cpu')

        net.load_state_dict(weights, strict=False)

    print("### Training ###")

    # Setting prior
    if network.startswith('pb') and prior in ['init', 'pretrain']:
        net.set_priors(net.state_dict())

    # Adding early stopping and lr scheduler
    reduce_lr = ReduceLROnPlateau(monitor=monitor_metric, mode='min', patience=lr_patience, factor=0.5, \
                                  threshold_mode='abs', threshold=1e-4, verbose=True)
    lr_schedulers = [reduce_lr]

    early_stopping = EarlyStopping(monitor=monitor_metric,
                                   mode='min',
                                   min_delta=1e-4,
                                   patience=stop_early,
                                   verbose=True)
    if stop_early > 0:
        callbacks.append(early_stopping)

    # Initializing optimizer
    if optim_algo == "sgd":
        optimizer = torch.optim.SGD(net.parameters(),
                                    lr=learning_rate,
                                    momentum=0.9,
                                    weight_decay=weight_decay)
    elif optim_algo == "adam":
        optimizer = torch.optim.Adam(net.parameters(),
                                     lr=learning_rate,
                                     weight_decay=weight_decay)

    # Creating Poutyne experiment
    expt = Experiment(directory=logging_path,
                      network=net,
                      optimizer=optimizer,
                      loss_function=cost_function,
                      monitor_metric=monitor_metric,
                      device=device,
                      logging=logging,
                      batch_metrics=batch_metrics,
                      epoch_metrics=epoch_metrics)

    # Initializing data loaders
    train_loader = DataLoader(TensorDataset(torch.Tensor(X_train),
                                            torch.Tensor(y_train)),
                              batch_size,
                              shuffle=True)
    valid_loader = None
    if valid_set_use == 'val':
        valid_loader = DataLoader(
            TensorDataset(torch.Tensor(X_valid), torch.Tensor(y_valid)),
            batch_size)

    # Launching training
    expt.train(train_generator=train_loader,
               valid_generator=valid_loader,
               epochs=epochs,
               callbacks=callbacks,
               lr_schedulers=lr_schedulers,
               save_every_epoch=save_every_epoch,
               disable_tensorboard=True,
               seed=random_seed)

    print("### Testing ###")
    sign_act_fct = lambda: Lambda(lambda x: torch.sign(x))
    test_loader = DataLoader(
        TensorDataset(torch.Tensor(X_test), torch.Tensor(y_test)), batch_size)

    if network == 'baseline':
        expt.test(test_generator=test_loader,
                  checkpoint='best',
                  seed=random_seed)
        # Binary network testing (sign activation)
        best_epoch = expt.get_best_epoch_stats()['epoch'].item()
        ckpt_filename = expt.best_checkpoint_filename.format(epoch=best_epoch)
        binary_net = BaselineNet(X_test.shape[1],
                                 hidden_layers * [hidden_size], sign_act_fct)
        weights = torch.load(ckpt_filename, map_location='cpu')
        binary_net.load_state_dict(weights, strict=False)
        binary_model = Model(binary_net,
                             'sgd',
                             linear_loss,
                             batch_metrics=[accuracy])
        test_loss, test_accuracy = binary_model.evaluate_generator(test_loader,
                                                                   steps=None)
        test_stats = pd.read_csv(expt.test_log_filename.format(name='test'),
                                 sep='\t')
        test_stats['bin_test_linear_loss'] = test_loss
        test_stats['bin_test_accuracy'] = test_accuracy
        test_stats['linear_loss'] = test_stats['loss']
        test_stats['val_linear_loss'] = test_stats['val_loss']
        test_stats['test_linear_loss'] = test_stats['test_loss']
        test_stats.to_csv(expt.test_log_filename.format(name='test'),
                          sep='\t',
                          index=False)

    def pbgnet_testing(target_metric, irrelevant_columns, n_repetitions=20):
        print(f"Restoring best model according to {target_metric}")

        # Cleaning logs
        history = pd.read_csv(expt.log_filename,
                              sep='\t').drop(irrelevant_columns,
                                             axis=1,
                                             errors='ignore')
        history.to_csv(expt.log_filename, sep='\t', index=False)

        # Loading best weights
        best_epoch_index = history[target_metric].idxmin()
        best_epoch_stats = history.iloc[best_epoch_index:best_epoch_index +
                                        1].reset_index(drop=True)
        best_epoch = best_epoch_stats['epoch'].item()
        print(f"Found best checkpoint at epoch: {best_epoch}")
        ckpt_filename = expt.best_checkpoint_filename.format(epoch=best_epoch)
        if network in ['pbgnet_ll', 'pbcombinet_ll'
                       ] and target_metric == 'bound':
            ckpt_filename = join(logging_path, 'bound_checkpoint_epoch.ckpt')
        weights = torch.load(ckpt_filename, map_location='cpu')

        # Binary network testing (sign activation)
        binary_net = BaselineNet(X_test.shape[1],
                                 hidden_layers * [hidden_size], sign_act_fct)
        updated_weights = {}
        for name, weight in weights.items():
            if name.startswith('layers'):
                name = name.split('.', 2)
                name[1] = str(2 * int(name[1]))
                name = '.'.join(name)
                updated_weights[name] = weight

        binary_net.load_state_dict(updated_weights, strict=False)
        binary_model = Model(binary_net,
                             'sgd',
                             linear_loss,
                             batch_metrics=[accuracy])
        test_loss, test_accuracy = binary_model.evaluate_generator(test_loader,
                                                                   steps=None)

        best_epoch_stats['bin_test_linear_loss'] = test_loss
        best_epoch_stats['bin_test_accuracy'] = test_accuracy

        model = expt.model
        model.load_weights(ckpt_filename)

        def repeat_inference(loader, prefix='', drop_keys=[], n_times=20):
            metrics_names = [prefix + 'loss'] + [
                prefix + metric_name for metric_name in model.metrics_names
            ]
            metrics_list = []

            for _ in range(n_times):
                loss, metrics = model.evaluate_generator(loader, steps=None)
                if not isinstance(metrics, np.ndarray):
                    metrics = np.array([metrics])
                metrics_list.append(np.concatenate(([loss], metrics)))
            metrics_list = [list(e) for e in zip(*metrics_list)]
            metrics_stats = pd.DataFrame(
                {col: val
                 for col, val in zip(metrics_names, metrics_list)})
            return metrics_stats.drop(drop_keys, axis=1, errors='ignore')

        metrics_stats = repeat_inference(train_loader, n_times=n_repetitions)

        metrics_stats = metrics_stats.join(
            repeat_inference(test_loader,
                             prefix='test_',
                             drop_keys=['test_bound', 'test_kl', 'test_C'],
                             n_times=n_repetitions))

        best_epoch_stats = best_epoch_stats.drop(metrics_stats.keys().tolist(),
                                                 axis=1,
                                                 errors='ignore')
        metrics_stats = metrics_stats.join(
            pd.concat([best_epoch_stats] * n_repetitions, ignore_index=True))

        log_filename = expt.test_log_filename.format(name='test')
        if network in ['pbgnet_ll', 'pbcombinet_ll'
                       ] and target_metric == 'bound':
            log_filename = join(logging_path, 'bound_test_log.tsv')
        metrics_stats.to_csv(log_filename, sep='\t', index=False)

    default_irrelevant_columns = ['val_bound', 'val_kl', 'val_C']
    if network == 'pbgnet_ll':
        pbgnet_testing(target_metric='val_loss',
                       irrelevant_columns=default_irrelevant_columns,
                       n_repetitions=20)
        pbgnet_testing(target_metric='bound',
                       irrelevant_columns=default_irrelevant_columns,
                       n_repetitions=20)

    elif network == 'pbgnet':
        pbgnet_testing(
            target_metric='bound',
            irrelevant_columns=['val_loss', 'val_accuracy', 'val_linear_loss'
                                ] + default_irrelevant_columns,
            n_repetitions=20)

    elif network == 'pbcombinet_ll':
        pbgnet_testing(target_metric='val_loss',
                       irrelevant_columns=default_irrelevant_columns,
                       n_repetitions=1)
        pbgnet_testing(target_metric='bound',
                       irrelevant_columns=default_irrelevant_columns,
                       n_repetitions=1)

    elif network == 'pbcombinet':
        pbgnet_testing(
            target_metric='bound',
            irrelevant_columns=['val_loss', 'val_accuracy', 'val_linear_loss'
                                ] + default_irrelevant_columns,
            n_repetitions=1)
    if logging:
        with open(join(logging_path, 'done.txt'), 'w') as done_file:
            done_file.write("done")

    print("### DONE ###")
Exemplo n.º 8
0
def main():
    vec_model = KeyedVectors.load_word2vec_format(config.pretrained_embs[0],
                                                  limit=500000)
    print("Breakpoint 1")

    x = load_anto_syn_graph(config.synonyms_graph[0],
                            config.antonyms_graph[0],
                            vec_model,
                            neg_sample=config.nb_false)

    weight = compute_weight(x)

    print("Breakpoint 2")
    train_generator, valid_generator, test_generator = prepare_generator_graph(
        x)
    print("Breakpoint 3")
    device = torch.device(
        'cuda:%d' % config.device if torch.cuda.is_available() else 'cpu')

    network = Retrofit(vec_model, weight)

    embeddings_param_set = set(network.embedding.parameters())
    other_params_list = [
        p for p in network.parameters() if p not in embeddings_param_set
    ]
    optimizer = optim.SGD([{
        'params': other_params_list,
        **config.optimizer_other_params
    }, {
        'params': network.embedding.parameters(),
        **config.optimizer_embeddings_params
    }])

    #scheduler = LambdaLR(lr_lambda=[lambda_lr_other, lambda_lr_embedding])
    #scheduler = StepLR(step_size=8, gamma=0.1)
    scheduler = ReduceLROnPlateau(monitor='val_loss',
                                  mode='min',
                                  patience=2,
                                  verbose=True)
    callbacks = [scheduler]

    exp = Experiment(config.dir_experiment,
                     network,
                     device=device,
                     optimizer=optimizer,
                     loss_function=None,
                     batch_metrics=['acc'])

    exp.train(train_generator,
              valid_generator,
              epochs=config.epoch,
              lr_schedulers=callbacks)
    exp.test(test_generator)

    steps = len(test_generator)
    test_loss, test_metrics, pred_y, true_y = exp.model.evaluate_generator(
        test_generator,
        return_pred=True,
        return_ground_truth=True,
        steps=steps)

    pred_y = np.argmax(np.concatenate(pred_y), 1)
    true_y = np.concatenate(true_y)
    true_syn, false_syn, false_anto, true_anto = confusion_matrix(
        true_y, pred_y).ravel()
    print(true_syn, false_syn, false_anto, true_anto)

    learning_visualizer = LearningVisualizer(exp, config.epoch)
    learning_visualizer.visualize_learning()

    exp._load_best_checkpoint()

    for file in config.evaluations_file:
        print(evaluation(file, vec_model.vocab, exp.model.model.embedding))

    vec_model_initial = KeyedVectors.load_word2vec_format(
        config.pretrained_embs[0], limit=500000)
    original_weights = torch.FloatTensor(vec_model_initial.vectors)
    original_weights.to("cuda")
    original_embs = nn.Embedding.from_pretrained(original_weights)
    original_embs.cuda()
    original_embs.weight.requires_grad = False

    for file in config.evaluations_file:
        print(evaluation(file, vec_model.vocab, original_embs))
Exemplo n.º 9
0
    def test(
        self,
        test_dataset_container: DatasetContainer,
        batch_size: int = 32,
        num_workers: int = 1,
        callbacks: Union[List, None] = None,
        seed: int = 42,
    ) -> Dict:
        # pylint: disable=too-many-arguments, too-many-locals
        """
        Method to test a retrained or a pre-trained model using a dataset with the default tags. If you test a
        retrained model with different prediction tags, we will use those tags.

        Args:
            test_dataset_container (~deepparse.dataset_container.DatasetContainer):
                The test dataset container of the data to use.
            batch_size (int): The size of the batch (default is 32).
            num_workers (int): Number of workers to use for the data loader (default is 1 worker).
            callbacks (Union[list, None]): List of callbacks to use during training.
                See Poutyne `callback <https://poutyne.org/callbacks.html#callback-class>`_ for more information.
                By default, we set no callback.
            seed (int): Seed to use (by default 42).
            callbacks (Union[list, None]): List of callbacks to use during training.
                See Poutyne `callback <https://poutyne.org/callbacks.html#callback-class>`_ for more information.
                By default, we set no callback.
        Return:
            A dictionary with the stats (see `Experiment class
            <https://poutyne.org/experiment.html#poutyne.Experiment.train>`_ for details).

        Note:
            We use NLL loss and accuracy as in the `article <https://arxiv.org/abs/2006.16152>`_.

        Examples:

            .. code-block:: python

                address_parser = AddressParser(device=0) #on gpu device 0
                data_path = 'path_to_a_pickle_test_dataset.p'

                test_container = PickleDatasetContainer(data_path, is_training_container=False)

                address_parser.test(test_container) # We test the model on the data

            You can also test your fine-tuned model

            .. code-block:: python

                address_components = {"ATag":0, "AnotherTag": 1, "EOS": 2}

                address_parser = AddressParser(device=0) #on gpu device 0

                # Train phase
                data_path = 'path_to_a_pickle_train_dataset.p'

                train_container = PickleDatasetContainer(data_path)

                address_parser.retrain(container, 0.8, epochs=1, batch_size=128, prediction_tags=address_components)

                # Test phase
                data_path = 'path_to_a_pickle_test_dataset.p'

                test_container = PickleDatasetContainer(data_path, is_training_container=False)

                address_parser.test(test_container) # Test the retrained model

        """
        if "fasttext-light" in self.model_type:
            raise ValueError(
                "It's not possible to test a fasttext-light due to pymagnitude problem. See Retrain method"
                "doc for more details.")

        if not test_dataset_container.is_a_train_container():
            raise ValueError("The dataset container is not a train container.")

        callbacks = [] if callbacks is None else callbacks
        data_transform = self._set_data_transformer()

        test_generator = DataLoader(
            test_dataset_container,
            collate_fn=data_transform.output_transform,
            batch_size=batch_size,
            num_workers=num_workers,
        )

        exp = Experiment(
            "./checkpoint",
            self.model,
            device=self.device,
            loss_function=nll_loss,
            batch_metrics=[accuracy],
            logging=False,
        )  # We set logging to false since we don't need it

        test_res = exp.test(test_generator,
                            seed=seed,
                            callbacks=callbacks,
                            verbose=self.verbose)

        return test_res
Exemplo n.º 10
0
    def retrain(
        self,
        dataset_container: DatasetContainer,
        train_ratio: float = 0.8,
        batch_size: int = 32,
        epochs: int = 5,
        num_workers: int = 1,
        learning_rate: float = 0.01,
        callbacks: Union[List, None] = None,
        seed: int = 42,
        logging_path: str = "./checkpoints",
        disable_tensorboard: bool = True,
        prediction_tags: Union[Dict, None] = None,
        seq2seq_params: Union[Dict, None] = None,
        layers_to_freeze: Union[str, None] = None,
    ) -> List[Dict]:
        # pylint: disable=too-many-arguments, line-too-long, too-many-locals, too-many-branches, too-many-statements
        """
        Method to retrain the address parser model using a dataset with the same tags. We train using
        `experiment <https://poutyne.org/experiment.html>`_ from `poutyne <https://poutyne.org/index.html>`_
        framework. The experiment module allows us to save checkpoints (``ckpt``, in a pickle format) and a log.tsv
        where the best epochs can be found (the best epoch is used for the test). The retrained model file name are
        formatted as ``retrained_{model_type}_address_parser.ckpt``. For example, if you retrain a fasttext model,
        the file name will be ``retrained_fasttext_address_parser.ckpt``. The retrained saved model included, in a
        dictionary format, the model weights, the model type, if new ``prediction_tags`` were used, the new
        prediction tags, and if new ``seq2seq_params`` were used, the new seq2seq parameters.

        Args:
            dataset_container (~deepparse.dataset_container.DatasetContainer): The dataset container of the data to use
                such as any PyTorch Dataset (:class:`~torch.utils.data.Dataset`) user define class or one of our two
                DatasetContainer (:class:`~deepparse.dataset_container.PickleDatasetContainer` or
                :class:`~deepparse.dataset_container.CSVDatasetContainer`)
            train_ratio (float): The ratio to use of the dataset for the training. The rest of the data is used for the
                validation (e.g. a train ratio of 0.8 mean a 80-20 train-valid split) (default is 0.8).
            batch_size (int): The size of the batch (default is 32).
            epochs (int): number of training epochs (default is 5).
            num_workers (int): Number of workers to use for the data loader (default is 1 worker).
            learning_rate (float): The learning rate (LR) to use for training (default 0.01).
            callbacks (Union[list, None]): List of callbacks to use during training.
                See Poutyne `callback <https://poutyne.org/callbacks.html#callback-class>`_ for more information. By
                default, we set no callback.
            seed (int): Seed to use (by default 42).
            logging_path (str): The logging path for the checkpoints. Poutyne will use the best one and reload the
                state if any checkpoints are there. Thus, an error will be raised if you change the model type.
                For example,  you retrain a FastText model and then retrain a BPEmb in the same logging path directory.
                By default, the path is ``./checkpoints``.
            disable_tensorboard (bool): To disable Poutyne automatic Tensorboard monitoring. By default, we disable them
                (true).
            prediction_tags (Union[dict, None]): A dictionary where the keys are the address components
                (e.g. street name) and the values are the components indices (from 0 to N + 1) to use during retraining
                of a model. The ``+ 1`` corresponds to the End Of Sequence (EOS) token that needs to be included in the
                dictionary. We will use the length of this dictionary for the output size of the prediction layer.
                We also save the dictionary to be used later on when you load the model. Default is None, meaning
                we use our pre-trained model prediction tags.
            seq2seq_params (Union[dict, None]): A dictionary of seq2seq parameters to modify the seq2seq architecture
                to train. Note that if you change the seq2seq parameters, a new model will be trained from scratch.
                Parameters that can be modified are:

                    - The ``input_size`` of the encoder (i.e. the embeddings size). The default value is 300.
                    - The size of the ``encoder_hidden_size`` of the encoder. The default value is 1024.
                    - The number of ``encoder_num_layers`` of the encoder. The default value is 1.
                    - The size of the ``decoder_hidden_size`` of the decoder. The default value is 1024.
                    - The number of ``decoder_num_layers`` of the decoder. The default value is 1.
                Default is None, meaning we use the default seq2seq architecture.
            layers_to_freeze (Union[str, None]): Name of the portion of the seq2seq to freeze layers,
                thus reducing the number of parameters to learn. Will be ignored if ``seq2seq_params`` is not None.
                Possible freezing settings are:

                    - ``None``: No layers are frozen.
                    - 'encoder': To freeze the encoder part of the seq2seq. That is the part that encodes the address
                    into a more dense representation.
                    - 'decoder': To freeze the decoder part of the seq2seq. That is the part that decodes a dense
                    address representation.
                    - 'prediction_layer': To freeze the last layer that predicts a tag class (i.e. a fully connected
                    with an output size of the same length as the prediction tags).
                    - 'seq2seq': To freeze the encoder and decoder but **not** the prediction layer.

               Default is ``None``, meaning we do not freeze any layers.

        Return:
            A list of dictionary with the best epoch stats (see `Experiment class
            <https://poutyne.org/experiment.html#poutyne.Experiment.train>`_ for details).

        Note:
            We recommend using a learning rate scheduler procedure during retraining to reduce the chance
            of losing too much of our learned weights, thus increasing retraining time. We
            personally use the following ``poutyne.StepLR(step_size=1, gamma=0.1)``.
            Also, starting learning rate should be relatively low (i.e. 0.01 or lower).

        Note:
            We use SGD optimizer, NLL loss and accuracy as a metric, the data is shuffled, and we use teacher forcing
            during training (with a prob of 0.5) as in the `article <https://arxiv.org/abs/2006.16152>`_.

        Note:
            Due to pymagnitude, we could not train using the Magnitude embeddings, meaning it's not possible to
            train using the fasttext-light model. But, since we don't update the embeddings weights, one can retrain
            using the fasttext model and later on use the weights with the fasttext-light.

        Note:
            When retraining a model, Poutyne will create checkpoints. After the training, we use the best checkpoint
            in a directory as the model to load. Thus, if you train two different models in the same directory,
            the second retrain will not work due to model differences.

        Examples:

            .. code-block:: python

                address_parser = AddressParser(device=0) #on gpu device 0
                data_path = 'path_to_a_pickle_dataset.p'

                container = PickleDatasetContainer(data_path)

                address_parser.retrain(container, 0.8, epochs=1, batch_size=128)

            Using the freezing layers parameters to freeze layers during training

            .. code-block:: python

                address_parser = AddressParser(device=0)
                data_path = 'path_to_a_csv_dataset.p'

                container = CSVDatasetContainer(data_path)
                address_parser.retrain(container, 0.8, epochs=5, batch_size=128, layers_to_freeze="encoder")

            Using learning rate scheduler callback.

            .. code-block:: python

                import poutyne

                address_parser = AddressParser(device=0)
                data_path = 'path_to_a_csv_dataset.p'

                container = CSVDatasetContainer(data_path)

                lr_scheduler = poutyne.StepLR(step_size=1, gamma=0.1) # reduce LR by a factor of 10 each epoch
                address_parser.retrain(container, 0.8, epochs=5, batch_size=128, callbacks=[lr_scheduler])

            Using your own prediction tags dictionary.

            .. code-block:: python

                address_components = {"ATag":0, "AnotherTag": 1, "EOS": 2}

                address_parser = AddressParser(device=0) #on gpu device 0
                data_path = 'path_to_a_pickle_dataset.p'

                container = PickleDatasetContainer(data_path)

                address_parser.retrain(container, 0.8, epochs=1, batch_size=128, prediction_tags=address_components)

            Using your own seq2seq parameters.

            .. code-block:: python

                seq2seq_params = {"encoder_hidden_size": 512, "decoder_hidden_size": 512}

                address_parser = AddressParser(device=0) #on gpu device 0
                data_path = 'path_to_a_pickle_dataset.p'

                container = PickleDatasetContainer(data_path)

                address_parser.retrain(container, 0.8, epochs=1, batch_size=128, seq2seq_params=seq2seq_params)


            Using your own seq2seq parameters and prediction tags dictionary.

            .. code-block:: python

                seq2seq_params = {"encoder_hidden_size": 512, "decoder_hidden_size": 512}
                address_components = {"ATag":0, "AnotherTag": 1, "EOS": 2}

                address_parser = AddressParser(device=0) #on gpu device 0
                data_path = 'path_to_a_pickle_dataset.p'

                container = PickleDatasetContainer(data_path)

                address_parser.retrain(container, 0.8, epochs=1, batch_size=128, seq2seq_params=seq2seq_params,
                    prediction_tags=address_components)

        """
        if "fasttext-light" in self.model_type:
            raise ValueError(
                "It's not possible to retrain a fasttext-light due to pymagnitude problem."
            )

        if not dataset_container.is_a_train_container():
            raise ValueError("The dataset container is not a train container.")

        model_factory_dict = {
            "prediction_layer_len": 9
        }  # We set the default output dim size

        if prediction_tags is not None:
            # Handle prediction tags
            if "EOS" not in prediction_tags.keys():
                raise ValueError(
                    "The prediction tags dictionary is missing the EOS tag.")

            fields = [field for field in prediction_tags if field != "EOS"]
            formatted_parsed_address.FIELDS = fields

            self.tags_converter = TagsConverter(prediction_tags)

            if not self.model.same_output_dim(self.tags_converter.dim):
                # Since we have change the output layer dim, we need to handle the prediction layer dim
                new_dim = self.tags_converter.dim
                if seq2seq_params is None:
                    self.model.handle_new_output_dim(new_dim)
                else:
                    # We update the output dim size
                    model_factory_dict.update(
                        {"prediction_layer_len": new_dim})

        if seq2seq_params is not None:
            # Handle seq2seq params
            # We set the flag to use the pre-trained weights to false since we train new ones
            seq2seq_params.update({"pre_trained_weights": False})

            model_factory_dict.update({"seq2seq_kwargs": seq2seq_params})
            # We set verbose to false since model is reloaded
            self._model_factory(verbose=False,
                                path_to_retrained_model=None,
                                **model_factory_dict)

        callbacks = [] if callbacks is None else callbacks
        train_generator, valid_generator = self._create_training_data_generator(
            dataset_container, train_ratio, batch_size, num_workers, seed=seed)

        if layers_to_freeze is not None and seq2seq_params is None:
            # We ignore the layers to freeze if seq2seq_params is not None
            self._freeze_model_params(layers_to_freeze)

        optimizer = SGD(self.model.parameters(), learning_rate)

        exp = Experiment(
            logging_path,
            self.model,
            device=self.device,
            optimizer=optimizer,
            loss_function=nll_loss,
            batch_metrics=[accuracy],
        )

        try:
            with_capturing_context = False
            if float(".".join(str(
                    poutyne.version.__version__).split(".")[:2])) < 1.8:
                print(
                    "You are using a older version of Poutyne that does not support properly error management."
                    " Due to that, we cannot show retrain progress. To fix that, update Poutyne to "
                    "the newest version.")
                with_capturing_context = True
            train_res = self._retrain(
                experiment=exp,
                train_generator=train_generator,
                valid_generator=valid_generator,
                epochs=epochs,
                seed=seed,
                callbacks=callbacks,
                disable_tensorboard=disable_tensorboard,
                capturing_context=with_capturing_context,
            )
        except RuntimeError as error:
            list_of_file_path = os.listdir(path=".")
            if len(list_of_file_path) > 0:
                if pretrained_parser_in_directory(logging_path):
                    # Mean we might already have checkpoint in the training directory
                    files_in_directory = get_files_in_directory(logging_path)
                    retrained_address_parser_in_directory = get_address_parser_in_directory(
                        files_in_directory)[0].split("_")[1]
                    if self.model_type != retrained_address_parser_in_directory:
                        raise ValueError(
                            f"You are currently training a {self.model_type} in the directory "
                            f"{logging_path} where a different retrained "
                            f"{retrained_address_parser_in_directory} is currently his."
                            f" Thus, the loading of the model is failing. Change directory to retrain the"
                            f" {self.model_type}.") from error
                    if self.model_type == retrained_address_parser_in_directory:
                        raise ValueError(
                            f"You are currently training a different {self.model_type} version from"
                            f" the one in the {logging_path}. Verify version."
                        ) from error
            else:
                raise RuntimeError(error.args[0]) from error
        else:
            file_path = os.path.join(
                logging_path,
                f"retrained_{self.model_type}_address_parser.ckpt")
            torch_save = {
                "address_tagger_model": exp.model.network.state_dict(),
                "model_type": self.model_type,
            }
            if seq2seq_params is not None:
                # Means we have changed the seq2seq params
                torch_save.update({"seq2seq_params": seq2seq_params})
            if prediction_tags is not None:
                #  Means we have changed the predictions tags
                torch_save.update({"prediction_tags": prediction_tags})

            torch.save(torch_save, file_path)
            return train_res
lstm = HierarchicalLSTMWithLearnableLossWithDropout(sentence_hidden_state_size,paragraph_hidden_state_size,300,2,nb_layers,drop_out)


class IterateLossFunctionOnEpoch(Callback):
    def __init__(self, epoch_number):
        self.epoch_number = epoch_number
        super().__init__()

    def on_epoch_end(self, epoch_number, logs):
        if epoch_number % self.epoch_number == 0:
            logs["loss"] = self.model.network.loss
            self.model.network.switch_loss_type()
            self.model.loss_function = self.model.network.loss_function


experiment = Experiment("model_weights/hc_{}_{}_{}".format(aggregation, dataset, test_name), lstm, optimizer=optimizer, device=0,loss_function=loss_functon, monitor_metric="val_fscore_macro", monitor_mode="max", epoch_metrics=[TopLevelAccuracy(), FBetaTopLevel(average='macro')])
# monitor_metric="top_level_accuracy", monitor_mode="max"
experiment.train(train_loader, valid_loader, lr_schedulers=[ReduceLROnPlateau(patience=patience, cooldown=cooldown)],epochs=epoch)
# BestModelRestoreOnEpoch(epoch=50,monitor="val_fscore_macro", mode="max" )


import tqdm
import json
import spacy

nlp = spacy.load('en_core_web_sm')

# for index, paragraph in enumerate(test_pickled):
#     current_test_ids = ["{}-{}-{}-{}".format(doc_id, par_id, sentence_id) for doc_id, par_id, sentence_id, _, _ in paragraph]
#     current_test_context = [text for _, _, _, text, _ in test_pickled]
#     current_preprocessed_test = [datapoint for _, _, _, _, datapoint in test_pickled]
Exemplo n.º 12
0
    paragraph_true_length = np.array(paragraph_true_length, dtype=np.int32)

    sentence_true_length_tensor = from_numpy(sentence_true_length)
    paragraph_true_length_tensor = from_numpy(paragraph_true_length)

    y_sentence_tensor = from_numpy(y_sentence)
    y_paragraph_tensor = from_numpy(y_paragraph)

    return (pad_sequence(x, batch_first=True), sentence_true_length_tensor, y_sentence_tensor, paragraph_true_length_tensor), y_paragraph_tensor

train_loader = PerClassLoader(train, batch_size=4, k=-1, collate_fn=reformat_and_pad_batch)
valid_loader = PerClassLoader(valid, batch_size=4, k=-1, shuffle=False, collate_fn=reformat_and_pad_batch)

lstm = HierarchicalLSTMwithDropout(sentence_hidden_state_size,paragraph_hidden_state_size,300,2,nb_layers,dropout)

experiment = Experiment("model_weights/final_{}_{}_{}".format(aggregation, dataset, test_name), lstm, optimizer="adam", loss_function="cross_entropy", device=0, task="classification", monitor_metric="val_fscore_macro", monitor_mode="max", epoch_metrics=[FBeta(average='macro')])
experiment.train(train_loader, valid_loader, lr_schedulers=[ReduceLROnPlateau(patience=3)],epochs=epoch)



import tqdm
import json
import spacy

nlp = spacy.load('en_core_web_sm')

# for index, paragraph in enumerate(test_pickled):
#     current_test_ids = ["{}-{}-{}-{}".format(doc_id, par_id, sentence_id) for doc_id, par_id, sentence_id, _, _ in paragraph]
#     current_test_context = [text for _, _, _, text, _ in test_pickled]
#     current_preprocessed_test = [datapoint for _, _, _, _, datapoint in test_pickled]
Exemplo n.º 13
0
def run(args):
    # Logging
    if args.verbose:
        logging.basicConfig(level=logging.DEBUG)

    # Initialization
    random_seed = 42
    np.random.seed(random_seed)
    torch.manual_seed(random_seed)
    # Fix bug in PyTorch where memory is still allocated on GPU0 when
    # asked to allocate memory on GPU1.
    if torch.cuda.is_available():
        torch.cuda.set_device(args.device)
    device = torch.device('cuda:%d' % args.device if torch.cuda.is_available() else 'cpu')

    # Building dataset
    dataset = TextDatasetBuilder(name=args.dataset,
                                 word_vectors=args.words,
                                 vector_size=args.vector,
                                 random_state=random_seed)
    logging.debug("Dataset built.")

    dataset.pre_process(min_freq=args.freq, max_len=args.len)
    embeddings = dataset.build_embeddings()
    logging.debug("Vocab size {}".format(len(dataset.vocab)))
    pos_enc_len = None
    if args.pos:
        pos_enc_len = args.len

    traind, validd, testd = dataset.get_train_valid_test()
    logging.debug("Split: train = {}, valid = {} and test = {}".format(len(traind), len(validd), len(testd)))

    # Creating Data Loaders
    train_loader = DataLoader(traind,
                              batch_size=args.batch,
                              shuffle=True,
                              collate_fn=collate_padding)
    valid_loader = DataLoader(validd,
                              batch_size=args.batch,
                              shuffle=False,
                              collate_fn=collate_padding)

    test_loader = DataLoader(testd,
                             batch_size=args.batch,
                             shuffle=False,
                             collate_fn=collate_padding)


    model = SANet(input_size=args.vector,
                  hidden_size=args.hidden,
                  n_classes=len(dataset.classes),
                  embeddings=embeddings,
                  n_blocks=args.blocks,
                  pos_enc_len=pos_enc_len)

    init_model(model)

    params = [p for n, p in model.named_parameters() if n != 'word_embedding.weight']
    optimizer = optim.SGD([{'params': model.word_embedding.parameters(), 'lr': args.lr * 0.1},
                           {'params': params, 'lr':args.lr, 'momentum':args.momentum}])

    # Preparing results output
    expt_path = join(RESULTS_PATH, args.dataset, args.exp)
    expt = Experiment(expt_path, model, device=device, logging=True, optimizer=optimizer, task='classifier')

    reduce_lr = ReduceLROnPlateau(monitor='loss',
                                  mode='min',
                                  patience=2,
                                  factor=0.5,
                                  threshold_mode='abs',
                                  threshold=1e-3,
                                  verbose=True)

    expt.train(train_loader, valid_loader,
               epochs=args.epochs,
               lr_schedulers=[reduce_lr])

    expt.test(test_loader)

    print("### DONE ###")
def main(args):
    raw_dataset = RegressionDatasetFolder(os.path.join(
        args.root_dir, 'Images/1024_with_jedi'),
                                          input_only_transform=None,
                                          transform=Compose([ToTensor()]))
    mean, std = compute_mean_std(raw_dataset)
    print(mean)
    print(std)
    pos_weights = compute_pos_weight(raw_dataset)
    print(pos_weights)
    test_dataset = RegressionDatasetFolder(
        os.path.join(args.root_dir, 'Images/1024_with_jedi'),
        input_only_transform=Compose([Normalize(mean, std)]),
        transform=Compose(
            [Lambda(lambda img: pad_resize(img, 1024, 1024)),
             ToTensor()]),
        in_memory=True)

    valid_dataset = RegressionDatasetFolder(
        os.path.join(args.root_dir, 'Images/1024_with_jedi'),
        input_only_transform=Compose([Normalize(mean, std)]),
        transform=Compose([ToTensor()]),
        include_fname=True)

    train_split, valid_split, test_split, train_weights = get_splits(
        valid_dataset)
    valid_loader = DataLoader(Subset(test_dataset, valid_split),
                              batch_size=8,
                              num_workers=8,
                              pin_memory=False)

    # module = deeplabv3_efficientnet(n=5)
    module = fcn_resnet50(dropout=0.8)
    # module = deeplabv3_resnet50()

    optim = torch.optim.Adam(module.parameters(), lr=5e-4, weight_decay=2e-3)
    exp = Experiment(directory=os.path.join(args.root_dir, 'moar'),
                     module=module,
                     device=torch.device(args.device),
                     optimizer=optim,
                     loss_function=LovaszSoftmax(),
                     metrics=[miou, PixelWiseF1(None)],
                     monitor_metric='val_miou',
                     monitor_mode='max')

    lr_schedulers = [
        ReduceLROnPlateau(monitor='val_miou',
                          mode='max',
                          factor=0.2,
                          patience=3,
                          threshold=1e-1,
                          threshold_mode='abs')
    ]
    callbacks = [
        EarlyStopping(monitor='val_miou',
                      min_delta=1e-1,
                      patience=8,
                      verbose=True,
                      mode='max')
    ]

    for i, (crop_size, batch_size) in enumerate(zip([512], [5])):
        train_loader = get_loader_for_crop_batch(crop_size, batch_size,
                                                 train_split, mean, std,
                                                 train_weights, args.root_dir)

        exp.train(train_loader=train_loader,
                  valid_loader=valid_loader,
                  epochs=(1 + i) * 30,
                  lr_schedulers=lr_schedulers,
                  callbacks=callbacks)

    raw_dataset.print_filenames()

    pure_dataset = RegressionDatasetFolder(os.path.join(
        args.root_dir, 'Images/1024_with_jedi'),
                                           transform=Compose([ToTensor()]),
                                           include_fname=True)

    test_loader = DataLoader(Subset(test_dataset, test_split),
                             batch_size=8,
                             num_workers=8,
                             pin_memory=False)
    valid_loader = DataLoader(valid_dataset,
                              batch_size=1,
                              num_workers=8,
                              pin_memory=False)
    pure_loader = DataLoader(pure_dataset,
                             batch_size=1,
                             num_workers=8,
                             pin_memory=False)

    exp.test(test_loader)

    # for checkpoint in [11, 15, 16, 17, 21]:
    #     print("Testing checkpoint {}".format(checkpoint))
    #     exp.load_checkpoint(checkpoint)
    #     test_model_on_checkpoint(exp.model, test_loader)

    exp.load_checkpoint(11)
    module = exp.model.model
    module.eval()

    generate_output_folders(args.root_dir)

    splits = [(train_split, 'train'), (valid_split, 'valid'),
              (test_split, 'test')]

    results_csv = [[
        'Name', 'Type', 'Split', 'iou_nothing', 'iou_bark', 'iou_node',
        'iou_mean', 'f1_nothing', 'f1_bark', 'f1_node', 'f1_mean',
        'Output Bark %', 'Output Node %', 'Target Bark %', 'Target Node %'
    ]]

    with torch.no_grad():
        for image_number, (batch, pure_batch) in enumerate(
                zip(valid_loader, pure_loader)):
            input = pure_batch[0]
            target = pure_batch[1]
            fname = pure_batch[2][0]
            wood_type = pure_batch[3][0]

            del pure_batch

            outputs = module(batch[0].to(torch.device(args.device)))
            outputs = remove_small_zones(outputs)

            del batch

            names = ['Input', 'Target', 'Generated image']

            try:
                class_accs = iou(outputs, target.to(torch.device(args.device)))
                f1s = PixelWiseF1('all')(outputs, target) * 100

                acc = class_accs.mean()
                f1 = f1s.mean()
            except ValueError as e:
                print('Error on file {}'.format(fname))
                print(outputs.shape)
                print(target.shape)
                raise e

            outputs = torch.argmax(outputs, dim=1)

            imgs = [input, target, outputs]
            imgs = [img.detach().cpu().squeeze().numpy() for img in imgs]

            fig, axs = plt.subplots(1, 3)

            class_names = ['Nothing', 'Bark', 'Node']

            for i, ax in enumerate(axs.flatten()):
                img = imgs[i]

                raw = (len(img.shape) == 3)

                if raw:  # Raw input
                    img = img.transpose(1, 2, 0)

                values = np.unique(img.ravel())

                plotted_img = ax.imshow(img, vmax=2)
                ax.set_title(names[i])
                ax.axis('off')

                if not raw:  # Predicted image
                    patches = [
                        mpatches.Patch(
                            color=plotted_img.cmap(plotted_img.norm(value)),
                            label='{} zone'.format(class_names[value]))
                        for value in values
                    ]

            suptitle = 'Mean iou : {:.3f}\n'.format(acc)

            for split_idxs, split_name in splits:
                if image_number in split_idxs:
                    split = split_name

            running_csv_stats = [fname, wood_type, split]

            class_names = ['Nothing', 'Bark', 'Node']

            for c, c_acc in zip(class_names, class_accs):
                suptitle += '{} : {:.3f};  '.format('iou_' + c, c_acc)
                running_csv_stats.append('{:.3f}'.format(c_acc))

            running_csv_stats.append('{:.3f}'.format(acc))
            suptitle += '\nMean f1 : {:.3f}\n'.format(f1)

            for c, c_f1 in zip(class_names, f1s):
                suptitle += '{} : {:.3f};  '.format('f1_' + c, c_f1)
                running_csv_stats.append('{:.3f}'.format(c_f1))

            running_csv_stats.append('{:.3f}'.format(f1))

            for class_idx in [1, 2]:
                class_percent = (outputs == class_idx).float().mean().cpu()
                running_csv_stats.append('{:.5f}'.format(class_percent * 100))

            for class_idx in [1, 2]:
                class_percent = (target == class_idx).float().mean().cpu()
                running_csv_stats.append('{:.5f}'.format(class_percent * 100))

            fig.legend(handles=patches,
                       title='Classes',
                       bbox_to_anchor=(0.4, -0.2, 0.5, 0.5))
            plt.suptitle(suptitle)
            plt.tight_layout()
            # plt.show()
            plt.savefig(os.path.join(
                args.root_dir,
                'Images/results/moar/combined_images/{}/{}/{}').format(
                    wood_type, split, fname),
                        format='png',
                        dpi=900)
            plt.close()

            outputs = outputs.squeeze().cpu().numpy()
            dual_outputs = np.zeros((outputs.shape[0], outputs.shape[1]),
                                    dtype=np.uint8)
            dual_outputs[outputs == 1] = 127
            dual_outputs[outputs == 2] = 255

            dual = Image.fromarray(dual_outputs, mode='L')
            dual.save(
                os.path.join(args.root_dir,
                             'Images/results/moar/outputs/{}/{}/{}').format(
                                 wood_type, split, fname))

            results_csv.append(running_csv_stats)

    csv_file = os.path.join(args.root_dir, 'Images', 'results', 'moar',
                            'final_stats.csv')

    with open(csv_file, 'w') as f:
        csv_writer = csv.writer(f, delimiter='\t')
        csv_writer.writerows(results_csv)
Exemplo n.º 15
0
from poutyne.framework import Experiment

# Instead of `task`, you can provide your own loss function and metrics.
expt = Experiment('my_directory', network, task='classifier', optimizer='sgd')
expt.train(train_loader,
           valid_loader,
           epochs=epochs,
           callbacks=callbacks,
           seed=42)
expt.test(test_loader)