示例#1
0
 def get_default_callbacks(self):
     return [
         ('epoch_timer', EpochTimer()),
         ('train_loss',
          EpochScoring(
              train_loss_score,
              name='train_loss',
              on_train=True,
          )),
         ('valid_loss', EpochScoring(
             valid_loss_score,
             name='valid_loss',
         )),
         ('valid_acc',
          EpochScoring(
              'accuracy',
              name='valid_acc',
              lower_is_better=False,
          )),
         ('print_log', PrintLog(sink=logger_info,
                                keys_ignored=[
                                    'batches',
                                ])),
         ('auc', EpochScoring(scoring='roc_auc', lower_is_better=False))
     ]
示例#2
0
 def _default_callbacks(self):
     return [
         ('epoch_timer', EpochTimer()),
         ('train_loss',
          PassthroughScoring(
              name='train_loss',
              on_train=True,
          )),
         ('valid_loss', PassthroughScoring(name='valid_loss', )),
         # add train accuracy because by default, there is no valid split
         ('train_acc',
          EpochScoring(
              'accuracy',
              name='train_acc',
              lower_is_better=False,
              on_train=True,
          )),
         ('valid_acc',
          EpochScoring(
              'accuracy',
              name='valid_acc',
              lower_is_better=False,
          )),
         ('print_log', PrintLog()),
     ]
示例#3
0
def train(data_folder: str, out_model: str):
    out_model = Path(out_model)
    out_model.mkdir()

    data_paths = list(Path(data_folder).rglob("*.npy"))
    train_paths, valid_paths = train_test_split(data_paths, train_size=0.7)

    train_dataset = LibriSpeechDataset(
        train_paths,
        Path(data_folder).parent / "SPEAKERS.TXT",
        Compose([ExtractStft(),
                 RandomCrop(constants.STFT_CROP_WIDTH)]))

    valid_dataset = LibriSpeechDataset(
        valid_paths,
        Path(data_folder).parent / "SPEAKERS.TXT",
        Compose([ExtractStft(),
                 RandomCrop(constants.STFT_CROP_WIDTH)]))

    net = NeuralNet(Classifier,
                    module__n_classes=constants.NUMBER_OF_CLASSES,
                    criterion=nn.CrossEntropyLoss,
                    batch_size=8,
                    max_epochs=100,
                    optimizer=optim.Adam,
                    lr=0.001,
                    iterator_train__shuffle=True,
                    iterator_train__num_workers=2,
                    iterator_valid__shuffle=False,
                    iterator_valid__num_workers=2,
                    train_split=predefined_split(valid_dataset),
                    device="cuda",
                    callbacks=[
                        Checkpoint(
                            f_params=(out_model / "params.pt").as_posix(),
                            f_optimizer=(out_model / "optim.pt").as_posix(),
                            f_history=(out_model / "history.pt").as_posix()),
                        ProgressBar(postfix_keys=["train_loss", "train_acc"]),
                        EarlyStopping(),
                        EpochScoring(acc,
                                     name="val_acc",
                                     lower_is_better=False,
                                     on_train=False),
                        EpochScoring(acc,
                                     name="train_acc",
                                     lower_is_better=False,
                                     on_train=True),
                        Tensorboard((out_model / "train").as_posix(),
                                    metrics={"acc": acc_as_metric},
                                    is_training=True),
                        Tensorboard((out_model / "valid").as_posix(),
                                    metrics={"acc": acc_as_metric},
                                    is_training=False),
                    ])

    net.fit(train_dataset)
示例#4
0
文件: net.py 项目: rain1024/skorch
 def get_default_callbacks(self):
     return [
         ('epoch_timer', EpochTimer),
         ('train_loss', EpochScoring(
             train_loss_score,
             name='train_loss',
             on_train=True,
         )),
         ('valid_loss', EpochScoring(
             valid_loss_score,
             name='valid_loss',
         )),
         ('print_log', PrintLog),
     ]
示例#5
0
    def test_string_monitor_and_formatting(self, save_params_mock, net_cls,
                                           checkpoint_cls, data):
        def epoch_3_scorer(net, *_):
            return 1 if net.history[-1, 'epoch'] == 3 else 0

        from skorch.callbacks import EpochScoring
        scoring = EpochScoring(scoring=epoch_3_scorer, on_train=True)

        sink = Mock()
        cb = checkpoint_cls(
            monitor='epoch_3_scorer',
            f_params='model_{last_epoch[epoch]}_{net.max_epochs}.pt',
            f_optimizer='optimizer_{last_epoch[epoch]}_{net.max_epochs}.pt',
            sink=sink)
        net = net_cls(callbacks=[('my_score', scoring), cb])
        net.fit(*data)

        assert save_params_mock.call_count == 3
        assert cb.get_formatted_files(net) == {
            'f_params': 'model_3_10.pt',
            'f_optimizer': 'optimizer_3_10.pt',
            'f_history': 'history.json',
            'f_pickle': None
        }
        save_params_mock.assert_has_calls([
            call(f_params='model_3_10.pt'),
            call(f_optimizer='optimizer_3_10.pt'),
            call(f_history='history.json')
        ])
        assert sink.call_count == 1
        assert all((x is False) for x in net.history[:2, 'event_cp'])
        assert net.history[2, 'event_cp'] is True
        assert all((x is False) for x in net.history[3:, 'event_cp'])
示例#6
0
def performance_skorch(
    X_train,
    X_test,
    y_train,
    y_test,
    batch_size,
    device,
    lr,
    max_epochs,
):
    torch.manual_seed(0)
    net = NeuralNetClassifier(
        ClassifierModule,
        batch_size=batch_size,
        optimizer=torch.optim.Adadelta,
        lr=lr,
        device=device,
        max_epochs=max_epochs,
        callbacks=[
            ('tr_acc',
             EpochScoring(
                 'accuracy',
                 lower_is_better=False,
                 on_train=True,
                 name='train_acc',
             )),
        ],
    )
    net.fit(X_train, y_train)
    y_pred = net.predict(X_test)
    score = accuracy_score(y_test, y_pred)
    return score
示例#7
0
 def _parse_str_callback(self, cb_supplied_name):
     scoring = get_scorer(cb_supplied_name)
     scoring_name = scoring._score_func.__name__
     assert scoring_name.endswith(
                     ('_score', '_error', '_deviance', '_loss'))
     if (scoring_name.endswith('_score') or
             cb_supplied_name.startswith('neg_')):
         lower_is_better = False
     else:
         lower_is_better = True
     train_name = f'train_{cb_supplied_name}'
     valid_name = f'valid_{cb_supplied_name}'
     if self.cropped:
         # TODO: use CroppedTimeSeriesEpochScoring when time series target
         # In case of cropped decoding we are using braindecode
         # specific scoring created for cropped decoding
         train_scoring = CroppedTrialEpochScoring(
             cb_supplied_name, lower_is_better, on_train=True, name=train_name
         )
         valid_scoring = CroppedTrialEpochScoring(
             cb_supplied_name, lower_is_better, on_train=False, name=valid_name
         )
     else:
         train_scoring = PostEpochTrainScoring(
             cb_supplied_name, lower_is_better, name=train_name
         )
         valid_scoring = EpochScoring(
             cb_supplied_name, lower_is_better, on_train=False, name=valid_name
         )
     named_by_user = True
     train_valid_callbacks = [
         (train_name, train_scoring, named_by_user),
         (valid_name, valid_scoring, named_by_user)
     ]
     return train_valid_callbacks
示例#8
0
def simple_pipeline_training_with_callbacks(x, y):
    # Trains the Neural Network within a scikit-learn pipeline
    # The pipeline is composed by scaling features and NN training
    # A callback is added in order to calculate the "balanced accuracy" and "accuracy" in the training phase

    # The EpochScoring from callbacks is initialized
    balanced_accuracy = EpochScoring(scoring='balanced_accuracy', lower_is_better=False)
    accuracy = EpochScoring(scoring='accuracy', lower_is_better=False)

    # The Neural Net is initialized with fixed hyperparameters
    nn = NeuralNetClassifier(NeuralNet, max_epochs=10, lr=0.01, batch_size=12, optimizer=optim.RMSprop, callbacks=[balanced_accuracy, accuracy])
    # The pipeline instatiated, it wraps scaling and training phase
    pipeline = Pipeline([('scale', StandardScaler()), ('nn', nn)])
    # Pipeline execution
    pipeline.fit(x, y)

    pass
示例#9
0
    def build_estimator(hyperparams, train_data, test=False):
        device = 'cuda' if torch.cuda.is_available() else 'cpu'

        # Extract info from training data
        X, y, *_ = train_data
        in_features = X.shape[1]
        n_classes = len(np.unique(y))
        n_samples = y.shape[0]
        bal_weights = torch.from_numpy(
            n_samples / (n_classes * np.bincount(y))).float().to(device)

        callbacks = [
            ('f1_score_valid',
             EpochScoring('f1' if n_classes == 2 else 'f1_macro',
                          name='valid_f1',
                          lower_is_better=False)),
            ('early_stopping',
             EarlyStopping(monitor='valid_loss',
                           patience=5,
                           lower_is_better=True)),
            (
                'learning_rate_scheduler',
                LRScheduler(
                    policy=lr_scheduler.ReduceLROnPlateau,
                    monitor='valid_loss',
                    # Following kargs are passed to the
                    # lr scheduler constructor
                    mode='min',
                    min_lr=1e-5)),
        ]

        return NeuralNetClassifier(
            NNModule,
            criterion=nn.CrossEntropyLoss,
            optimizer=torch.optim.SGD,
            max_epochs=300,
            iterator_train__shuffle=True,  # Shuffle training data on each epoch
            callbacks=callbacks,
            device=device,
            train_split=CVSplit(cv=5,
                                stratified=True,
                                random_state=RANDOM_STATE),
            lr=hyperparams['lr'],
            batch_size=hyperparams['batch_size'],
            module__in_features=in_features,
            module__n_classes=n_classes,
            module__n_layers=hyperparams['n_layers'],
            module__n_neuron_per_layer=hyperparams['n_neuron_per_layer'],
            module__activation=getattr(F, hyperparams['activation']),
            module__p_dropout=hyperparams['p_dropout'],
            criterion__weight=bal_weights
            if hyperparams['class_weight'] == 'balanced' else None,
            optimizer__momentum=hyperparams['momentum'],
            optimizer__weight_decay=hyperparams['weight_decay'],
            optimizer__nesterov=True,
            verbose=3,
            iterator_train__num_workers=4,
            iterator_valid__num_workers=4)
示例#10
0
    def train_(self,
               train_set,
               valid_set,
               lr=5e-4,
               batch_size=16,
               max_nb_epochs=20,
               early_stopping_patience=5,
               early_stopping_monitor='valid_bal_acc'):
        # Train using a GPU if possible
        device = "cuda" if torch.cuda.is_available() else "cpu"

        # Callbacks
        train_bal_acc = EpochScoring(scoring='balanced_accuracy',
                                     on_train=True,
                                     name='train_bal_acc',
                                     lower_is_better=False)
        valid_bal_acc = EpochScoring(scoring='balanced_accuracy',
                                     on_train=False,
                                     name='valid_bal_acc',
                                     lower_is_better=False)
        early_stopping = EarlyStopping(monitor=early_stopping_monitor,
                                       patience=early_stopping_patience,
                                       lower_is_better='loss'
                                       in early_stopping_monitor)
        callbacks = [
            ('train_bal_acc', train_bal_acc),
            ('valid_bal_acc', valid_bal_acc),
            ('progress_bar', ProgressBar()),
            ('early_stopping', early_stopping),
        ]

        # Skorch model creation
        skorch_net = EEGTransformer(self.to(device),
                                    criterion=torch.nn.CrossEntropyLoss,
                                    optimizer=torch.optim.Adam,
                                    optimizer__lr=lr,
                                    train_split=predefined_split(valid_set),
                                    batch_size=batch_size,
                                    callbacks=callbacks,
                                    device=device)

        # Training: `y` is None since it is already supplied in the dataset.
        skorch_net.fit(train_set, y=None, epochs=max_nb_epochs)

        return skorch_net
示例#11
0
文件: net.py 项目: rain1024/skorch
 def get_default_callbacks(self):
     return [
         ('epoch_timer', EpochTimer()),
         ('train_loss', EpochScoring(
             train_loss_score,
             name='train_loss',
             on_train=True,
         )),
         ('valid_loss', EpochScoring(
             valid_loss_score,
             name='valid_loss',
         )),
         ('valid_acc', EpochScoring(
             'accuracy',
             name='valid_acc',
             lower_is_better=False,
         )),
         ('print_log', PrintLog()),
     ]
示例#12
0
    def build_estimator(cls,
                        hyperparams,
                        train_data,
                        verbose=True,
                        test=False):  #  change default verbose to false later
        early_stopping_val_percent = 10

        n_training_examples = len(
            train_data[0]) * (1 - (early_stopping_val_percent / 100))
        n_iter_per_epoch = n_training_examples / hyperparams['batch_size']
        n_iter_btw_restarts = int(hyperparams['epochs_btw_restarts'] *
                                  n_iter_per_epoch)
        callbacks = [
            ('fix_seed', cls.FixRandomSeed(RANDOM_STATE)),
            ('lr_monitor', cls.LRMonitor()),
            ('accuracy_score_valid',
             EpochScoring('accuracy', lower_is_better=False, on_train=True)),
            ('early_stopping',
             EarlyStopping(monitor='valid_acc',
                           lower_is_better=False,
                           patience=100)),
            ('learning_rate_scheduler',
             LRScheduler(policy=cls.SkorchCosineAnnealingWarmRestarts,
                         T_0=n_iter_btw_restarts,
                         T_mult=hyperparams['epochs_btw_restarts_mult']))
        ]

        def validation_split(X, y):
            """ Custom split is used to apply augmentation to the training set only """
            splitter = CVSplit(cv=int(100 / early_stopping_val_percent),
                               random_state=RANDOM_STATE)
            dataset_train, dataset_valid = splitter(X)
            dataset_train = cls.AugmentedDataset(dataset_train)
            return dataset_train, dataset_valid

        return NeuralNetClassifier(
            cls.CifarCustomNet,
            criterion=nn.CrossEntropyLoss,
            optimizer=torch.optim.SGD,
            max_epochs=hyperparams['max_epochs'] if not test else 1,
            iterator_train__shuffle=True,
            iterator_train__num_workers=4,
            iterator_valid__num_workers=4,
            dataset=cls.NormalizedDataset,
            callbacks=callbacks,
            device=cls.device,
            train_split=validation_split,
            lr=hyperparams['learning_rate'],
            batch_size=hyperparams['batch_size'],
            optimizer__momentum=hyperparams['momentum'],
            optimizer__weight_decay=hyperparams['weight_decay'],
            optimizer__nesterov=hyperparams['nesterov'],
            module__conv_dropout=hyperparams['conv_dropout'],
            module__fc_dropout=hyperparams['fc_dropout'],
            verbose=3 if verbose else 0)
示例#13
0
def fit_nn(args, X, y):

    print('Neural net:')
    auc = EpochScoring(scoring='roc_auc', lower_is_better=False)
    apr = EpochScoring(scoring='average_precision', lower_is_better=False)
    # lrs = LRScheduler(policy='StepLR', step_size=10, gamma=0.7)

    params = param_lookup[args.dataset]

    net = NeuralNetClassifier(FCNet,
                              criterion=torch.nn.NLLLoss,
                              optimizer=torch.optim.Adam,
                              iterator_train__shuffle=True,
                              module__n_input=1206,
                              callbacks=[auc, apr],
                              train_split=None,
                              verbose=0,
                              **params)

    # fit on full dataset and save model
    torch.manual_seed(1000)
    net.fit(X, y)

    # net.save_params(f_params=MODEL_DIR / f'nn_{args.dataset}.pkl')
    with open(MODEL_DIR / f'nn_{args.dataset}.pkl', 'wb') as f:
        pickle.dump(net, f)

    # generate in-dataset CV predictions
    kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1111)
    torch.manual_seed(1000)
    cv_scores = cross_val_predict(net,
                                  X,
                                  y,
                                  cv=kf,
                                  method='predict_proba',
                                  n_jobs=-1)
    AUC = roc_auc_score(y, cv_scores[:, 1])
    APR = average_precision_score(y, cv_scores[:, 1])
    print('\tAUC ', np.round(AUC, 4))
    print('\tAPR ', np.round(APR, 4))

    np.save(MODEL_DIR / f'nn_{args.dataset}.npy', cv_scores[:, 1])
示例#14
0
    def objective(trial):
        nl = trial.suggest_categorical('n_layer', [2, 3])
        bs = trial.suggest_categorical('batch_size', [256])
        l2 = trial.suggest_uniform('l2', 1e-8, 1e-3)
        lr = trial.suggest_uniform('lr', 5e-5, 5e-3)
        eps = trial.suggest_categorical('epochs', [30, 40, 50])
        drop = trial.suggest_uniform('dropout', 0, 0.2)
        nodes1 = trial.suggest_categorical('nodes1', [200, 300, 400, 500, 600])
        nodes2 = trial.suggest_categorical('nodes2', [200, 300, 400, 500, 600])
        nodes3 = trial.suggest_categorical('nodes3', [200, 300, 400, 500, 600])

        auc = EpochScoring(scoring='roc_auc', lower_is_better=False)
        apr = EpochScoring(scoring='average_precision', lower_is_better=False)
        # lrs = LRScheduler(policy='StepLR', step_size=10, gamma=0.75)

        net = NeuralNetClassifier(FCNet,
                                  batch_size=bs,
                                  criterion=torch.nn.NLLLoss,
                                  optimizer=torch.optim.Adam,
                                  optimizer__weight_decay=l2,
                                  lr=lr,
                                  max_epochs=eps,
                                  iterator_train__shuffle=True,
                                  module__n_input=1206,
                                  module__n_units=(nodes1, nodes2,
                                                   nodes3) if nl == 3 else
                                  (nodes1, nodes2),
                                  module__dropout=drop,
                                  callbacks=[auc, apr],
                                  train_split=None,
                                  verbose=0)

        kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1111)
        torch.manual_seed(1000)
        cv_scores = cross_val_predict(net,
                                      X,
                                      y,
                                      cv=kf,
                                      method='predict_proba',
                                      n_jobs=-1)
        return roc_auc_score(y, cv_scores[:, 1])
示例#15
0
def skorch_murder_bot(input_size=28,
                      hidden_layers: np.ndarray = np.array([100, 30]),
                      batch_size=5):
    auc = EpochScoring(scoring='roc_auc', lower_is_better=False)
    return NeuralNetClassifier(MurderBot,
                               module__input_size=input_size,
                               module__hidden_layers=hidden_layers,
                               max_epochs=30,
                               batch_size=batch_size,
                               lr=0.1,
                               callbacks=[auc],
                               verbose=False)
def main():
    sampling_rate = 360

    wavelet = "mexh"  # mexh, morl, gaus8, gaus4
    scales = pywt.central_frequency(wavelet) * sampling_rate / np.arange(
        1, 101, 1)

    (x1_train, x2_train, y_train,
     groups_train), (x1_test, x2_test, y_test,
                     groups_test) = load_data(wavelet=wavelet,
                                              scales=scales,
                                              sampling_rate=sampling_rate)
    print("Data loaded successfully!")

    log_dir = "./logs/{}".format(wavelet)
    shutil.rmtree(log_dir, ignore_errors=True)

    callbacks = [
        Initializer("[conv|fc]*.weight", fn=torch.nn.init.kaiming_normal_),
        Initializer("[conv|fc]*.bias",
                    fn=partial(torch.nn.init.constant_, val=0.0)),
        LRScheduler(policy=StepLR, step_size=5, gamma=0.1),
        EpochScoring(scoring=make_scorer(f1_score, average="macro"),
                     lower_is_better=False,
                     name="valid_f1"),
        TensorBoard(SummaryWriter(log_dir))
    ]
    net = NeuralNetClassifier(  # skorch is extensive package of pytorch for compatible with scikit-learn
        MyModule,
        criterion=torch.nn.CrossEntropyLoss,
        optimizer=torch.optim.Adam,
        lr=0.001,
        max_epochs=30,
        batch_size=1024,
        train_split=predefined_split(
            Dataset({
                "x1": x1_test,
                "x2": x2_test
            }, y_test)),
        verbose=1,
        device="cuda",
        callbacks=callbacks,
        iterator_train__shuffle=True,
        optimizer__weight_decay=0,
    )
    net.fit({"x1": x1_train, "x2": x2_train}, y_train)
    y_true, y_pred = y_test, net.predict({"x1": x1_test, "x2": x2_test})

    print(confusion_matrix(y_true, y_pred))
    print(classification_report(y_true, y_pred, digits=4))

    net.save_params(f_params="./models/model_{}.pkl".format(wavelet))
示例#17
0
def sim_fit_model(args, X, y):

    auc = EpochScoring(scoring='roc_auc', lower_is_better=False)
    apr = EpochScoring(scoring='average_precision', lower_is_better=False)
    lrs = LRScheduler(policy='StepLR', step_size=10, gamma=0.5)

    if args.model == 'standard':

        net = NeuralNetClassifier(models.MpraDense,
                                  batch_size=256,
                                  optimizer=torch.optim.Adam,
                                  optimizer__weight_decay=2e-6,
                                  lr=1e-4,
                                  max_epochs=20,
                                  module__n_input=1079,
                                  module__n_units=(400, 250),
                                  module__dropout=0.3,
                                  callbacks=[auc, apr],
                                  iterator_train__shuffle=True,
                                  train_split=None)

    elif args.model == 'neighbors':

        net = NeuralNetClassifier(models.MpraFullCNN,
                                  batch_size=256,
                                  optimizer=torch.optim.Adam,
                                  optimizer__weight_decay=1e-2,
                                  lr=5e-5,
                                  max_epochs=20,
                                  callbacks=[auc, apr],
                                  iterator_train__shuffle=True,
                                  train_split=None)

    # generate predictions
    torch.manual_seed(1000)
    net.fit(X, y)
    return net
    def build_estimator(hyperparams, train_data, test=False):
        device = "cuda" if torch.cuda.is_available() else "cpu"

        # Extract info from training data
        X, y, *_ = train_data
        in_features = X.shape[1]

        callbacks = [
            ("r2_score_valid", EpochScoring("r2", lower_is_better=False)),
            (
                "early_stopping",
                EarlyStopping(monitor="valid_loss", patience=5, lower_is_better=True),
            ),
            (
                "learning_rate_scheduler",
                LRScheduler(
                    policy=lr_scheduler.ReduceLROnPlateau,
                    monitor="valid_loss",
                    # Following kargs are passed to the
                    # lr scheduler constructor
                    mode="min",
                    min_lr=1e-5,
                ),
            ),
        ]

        return NeuralNetRegressor(
            NNModule,
            criterion=nn.MSELoss,
            optimizer=torch.optim.SGD,
            max_epochs=300,
            iterator_train__shuffle=True,  # Shuffle training data on each epoch
            callbacks=callbacks,
            device=device,
            train_split=CVSplit(cv=5, random_state=RANDOM_STATE),
            lr=hyperparams["lr"],
            batch_size=hyperparams["batch_size"],
            module__in_features=in_features,
            module__n_layers=hyperparams["n_layers"],
            module__n_neuron_per_layer=hyperparams["n_neuron_per_layer"],
            module__activation=getattr(F, hyperparams["activation"]),
            module__p_dropout=hyperparams["p_dropout"],
            optimizer__momentum=hyperparams["momentum"],
            optimizer__weight_decay=hyperparams["weight_decay"],
            optimizer__nesterov=True,
            verbose=3,
            iterator_train__num_workers=4,
            iterator_valid__num_workers=4,
        )
def fnp_init(x_train, y_train, short_term):
    # Early Stopping when crps does not improve
    # (fnp can get worse if trained too long)
    def scoring(model, X, y):
        y_pred = model.predict(X.X['XM'], samples=5)
        score = crps(y_pred[..., 0], np.sqrt(y_pred[..., 1]), y)
        return score

    scorer = EpochScoring(scoring, on_train=True, name='crps')
    es = EarlyStopping(monitor='crps', patience=100)

    if short_term:
        epochs = 300
        hs_enc = [24, 64]
        hs_dec = [32]
        dim_u = 2
        dim_z = 32
        fb_z = 1.0
    else:
        epochs = 300
        hs_enc = [132, 77]
        hs_dec = [50]
        dim_u = 9
        dim_z = 32
        fb_z = 1.0

    fnp = RegressionFNPSkorch(
        module=RegressionFNP,
        module__dim_x=x_train.shape[-1],
        module__dim_y=y_train.shape[-1],
        module__hidden_size_enc=hs_enc,
        module__hidden_size_dec=hs_dec,
        optimizer=torch.optim.Adam,
        device=device,
        seed=42,
        module__dim_u=dim_u,
        module__dim_z=dim_z,
        module__fb_z=fb_z,
        lr=0.001,
        reference_set_size_ratio=0.003,
        max_epochs=epochs,
        batch_size=1024,
        train_size=x_train.size,
        train_split=None,
        verbose=1,
        # callbacks=[scorer, es]
    )

    return fnp
示例#20
0
    def test_string_monitor_and_formatting(self, save_params_mock, net_cls,
                                           checkpoint_cls, data):
        def epoch_3_scorer(net, *_):
            return 1 if net.history[-1, 'epoch'] == 3 else 0

        from skorch.callbacks import EpochScoring
        scoring = EpochScoring(scoring=epoch_3_scorer,
                               on_train=True,
                               lower_is_better=False)

        sink = Mock()
        cb = checkpoint_cls(
            monitor='epoch_3_scorer_best',
            f_params='model_{last_epoch[epoch]}_{net.max_epochs}.pt',
            f_optimizer='optimizer_{last_epoch[epoch]}_{net.max_epochs}.pt',
            f_criterion='criterion_{last_epoch[epoch]}_{net.max_epochs}.pt',
            sink=sink)
        net = net_cls(callbacks=[('my_score', scoring), cb])
        net.fit(*data)

        assert save_params_mock.call_count == 8
        assert cb.get_formatted_files(net) == {
            'f_params': 'model_3_10.pt',
            'f_optimizer': 'optimizer_3_10.pt',
            'f_criterion': 'criterion_3_10.pt',
            'f_history': 'history.json',
            'f_pickle': None
        }
        save_params_mock.assert_has_calls(
            [
                call(f_module='model_1_10.pt'),  # params is turned into module
                call(f_optimizer='optimizer_1_10.pt'),
                call(f_criterion='criterion_1_10.pt'),
                call(f_history='history.json'),
                call(f_module='model_3_10.pt'),  # params is turned into module
                call(f_optimizer='optimizer_3_10.pt'),
                call(f_criterion='criterion_3_10.pt'),
                call(f_history='history.json'),
            ],
            any_order=True,
        )
        assert sink.call_count == 2
        # The first epoch will always be saved. `epoch_3_scorer` returns 1 at
        # epoch 3, which will trigger another checkpoint. For all other epochs
        # `epoch_3_scorer` returns 0, which does not trigger a checkpoint.
        assert [True, False, True] + [False] * 7 == net.history[:, 'event_cp']
示例#21
0
    def test_load_initial_state_custom_scoring(
            self, checkpoint_cls, net_cls, loadinitstate_cls,
            data, tmpdir):
        def epoch_3_scorer(net, *_):
            return 1 if net.history[-1, 'epoch'] == 3 else 0

        from skorch.callbacks import EpochScoring
        scoring = EpochScoring(
            scoring=epoch_3_scorer, on_train=True, lower_is_better=False)

        skorch_dir = tmpdir.mkdir('skorch')
        f_params = skorch_dir.join(
            'model_epoch_{last_epoch[epoch]}.pt')
        f_optimizer = skorch_dir.join(
            'optimizer_epoch_{last_epoch[epoch]}.pt')
        f_history = skorch_dir.join(
            'history.json')

        cp = checkpoint_cls(
            monitor='epoch_3_scorer_best',
            f_params=str(f_params),
            f_optimizer=str(f_optimizer),
            f_history=str(f_history)
        )
        load_init_state = loadinitstate_cls(cp)
        net = net_cls(callbacks=[load_init_state, scoring, cp])

        net.fit(*data)

        assert skorch_dir.join('model_epoch_3.pt').exists()
        assert skorch_dir.join('optimizer_epoch_3.pt').exists()
        assert skorch_dir.join('history.json').exists()

        assert len(net.history) == 10
        del net

        new_net = net_cls(callbacks=[load_init_state, scoring, cp])
        new_net.fit(*data)

        # new_net starts from the best epoch of the first run
        # the best epcoh of the previous run was at epoch 3
        # the second run went through 10 epochs, thus
        # 3 + 10 = 13
        assert len(new_net.history) == 13
        assert new_net.history[:, 'event_cp'] == [
            True, False, True] + [False] * 10
    def build_estimator(hyperparams, train_data, test=False):
        device = 'cuda' if torch.cuda.is_available() else 'cpu'

        # Extract info from training data
        X, y, *_ = train_data
        in_features = X.shape[1]

        callbacks = [
            ('r2_score_valid', EpochScoring('r2',
                                            lower_is_better=False)),
            ('early_stopping', EarlyStopping(monitor='valid_loss',
                                             patience=5,
                                             lower_is_better=True)),
            ('learning_rate_scheduler', LRScheduler(policy=lr_scheduler.ReduceLROnPlateau,
                                                    monitor='valid_loss',
                                                    # Following kargs are passed to the
                                                    # lr scheduler constructor
                                                    mode='min',
                                                    min_lr=1e-5
                                                    )),
        ]

        return NeuralNetRegressor(
            NNModule,
            criterion=nn.MSELoss,
            optimizer=torch.optim.SGD,
            max_epochs=300,
            iterator_train__shuffle=True, # Shuffle training data on each epoch
            callbacks=callbacks,
            device=device,
            train_split=CVSplit(cv=5, random_state=RANDOM_STATE),
            lr=hyperparams['lr'],
            batch_size=hyperparams['batch_size'],
            module__in_features=in_features,
            module__n_layers=hyperparams['n_layers'],
            module__n_neuron_per_layer=hyperparams['n_neuron_per_layer'],
            module__activation=getattr(F, hyperparams['activation']),
            module__p_dropout=hyperparams['p_dropout'],
            optimizer__momentum=hyperparams['momentum'],
            optimizer__weight_decay=hyperparams['weight_decay'],
            optimizer__nesterov=True,
            verbose=3,
            iterator_train__num_workers=4,
            iterator_valid__num_workers=4
        )
示例#23
0
    def test_string_monitor_and_formatting(self, save_params_mock, net_cls,
                                           checkpoint_cls, data):
        def epoch_3_scorer(net, *_):
            return 1 if net.history[-1, 'epoch'] == 3 else 0

        from skorch.callbacks import EpochScoring
        scoring = EpochScoring(scoring=epoch_3_scorer, on_train=True)

        net = net_cls(callbacks=[
            ('my_score', scoring),
            checkpoint_cls(
                monitor='epoch_3_scorer',
                target='model_{last_epoch[epoch]}_{net.max_epochs}.pt'),
        ])
        net.fit(*data)

        assert save_params_mock.call_count == 1
        save_params_mock.assert_called_with('model_3_10.pt')
示例#24
0
    def _parse_callbacks(self, callbacks):
        callbacks_list = []
        if callbacks is not None:
            for callback in callbacks:
                if isinstance(callback, tuple):
                    callbacks_list.append(callback)
                else:
                    assert isinstance(callback, str)
                    scoring = get_scorer(callback)
                    scoring_name = scoring._score_func.__name__
                    assert scoring_name.endswith(
                        ('_score', '_error', '_deviance', '_loss'))
                    if (scoring_name.endswith('_score')
                            or callback.startswith('neg_')):
                        lower_is_better = False
                    else:
                        lower_is_better = True
                    train_name = f'train_{callback}'
                    valid_name = f'valid_{callback}'
                    if self.cropped:
                        # In case of cropped decoding we are using braindecode
                        # specific scoring created for cropped decoding
                        train_scoring = CroppedTrialEpochScoring(
                            callback,
                            lower_is_better,
                            on_train=True,
                            name=train_name)
                        valid_scoring = CroppedTrialEpochScoring(
                            callback,
                            lower_is_better,
                            on_train=False,
                            name=valid_name)
                    else:
                        train_scoring = PostEpochTrainScoring(callback,
                                                              lower_is_better,
                                                              name=train_name)
                        valid_scoring = EpochScoring(callback,
                                                     lower_is_better,
                                                     on_train=False,
                                                     name=valid_name)
                    callbacks_list.extend([(train_name, train_scoring),
                                           (valid_name, valid_scoring)])

        return callbacks_list
示例#25
0
    def test_get_params_works(self, net_cls, module_cls):
        from skorch.callbacks import EpochScoring

        net = net_cls(
            module_cls, callbacks=[('myscore', EpochScoring('myscore'))])

        params = net.get_params(deep=True)
        # test a couple of expected parameters
        assert 'verbose' in params
        assert 'module' in params
        assert 'callbacks' in params
        assert 'callbacks__print_log__sink' in params
        # not yet initialized
        assert 'callbacks__myscore__scoring' not in params

        net.initialize()
        params = net.get_params(deep=True)
        # now initialized
        assert 'callbacks__myscore__scoring' in params
示例#26
0
    def _default_callbacks(self):
        default_cb_list = [
            ('epoch_timer', EpochTimer()),
            ('train_loss',
             BatchScoring(train_loss_score,
                          name='train_loss',
                          on_train=True,
                          target_extractor=noop)),
            ('valid_loss',
             BatchScoring(valid_loss_score,
                          name='valid_loss',
                          target_extractor=noop)),
            ('valid_acc',
             EpochScoring(
                 'accuracy',
                 name='valid_acc',
                 lower_is_better=False,
             )),
            # ('checkpoint', Checkpoint(
            #     dirname=self.model_path)),
            # ('end_checkpoint', TrainEndCheckpoint(
            #     dirname=self.model_path)),
            ('report', ReportLog()),
            ('progressbar', ProgressBar())
        ]

        # if 'stop_patience' in self.hyperparamters.keys() and \
        #         self.hyperparamters['stop_patience']:
        #     earlystop_cb = ('earlystop',  EarlyStopping(
        #                     patience=self.patience,
        #                     threshold=1e-4))
        #     default_cb_list.append(earlystop_cb)
        #
        # if 'lr_step' in self.hyperparamters.keys() and \
        #         self.hyperparamters['lr_step']:
        #     lr_callback = ('lr_schedule', DecayLR(
        #                    self.hyperparamters['lr'],
        #                    self.hyperparamters['lr_step'],
        #                    gamma=0.5))
        #     default_cb_list.append(lr_callback)

        return default_cb_list
示例#27
0
def prepare_learnt_model(model_args, path_tfms, is_meta, verbose=2):
    """Model builder if learnt transforms are involved.

    The key difference (as explained in prepare_non_learnt_model) between this function and prepare_non_learnt_model
    is that the

    Args:
        model_args (dict): Experiment model args as defined in the main experiment function.
        path_tfms (Pipeline): An sklearn pipeline of path transformations to be applied before model training.
        is_meta (bool): Set True for a dyadic meta model.
        verbose (int): Output verbosity level.

    Returns:

    """
    # Initialise the signature string class.
    model_args['is_meta'] = is_meta
    module = SignatureStringModel(**model_args)

    model = NeuralNetClassifier(
        module=module,
        criterion=nn.BCEWithLogitsLoss if model_args['out_channels'] == 1 else nn.CrossEntropyLoss,
        batch_size=64,
        verbose=verbose,
        iterator_train__drop_last=True,
        callbacks=[
            ('scheduler', LRScheduler(policy='ReduceLROnPlateau')),
            ('val_stopping', EarlyStopping(monitor='valid_loss', patience=30)),
            ('checkpoint', CustomCheckpoint(monitor='valid_loss_best')),
            ('scorer', EpochScoring(custom_scorer, lower_is_better=False, name='true_acc'))
        ],
        train_split=CVSplit(cv=5, random_state=1, stratified=True),
        device=device if model_args['gpu'] else 'cpu',
    )
    pipeline = Pipeline([
        *path_tfms,
        ('classifier', model)
    ])
    return pipeline
示例#28
0
    def test_string_monitor_and_formatting(self, save_params_mock, net_cls,
                                           checkpoint_cls, data):
        def epoch_3_scorer(net, *_):
            return 1 if net.history[-1, 'epoch'] == 3 else 0

        from skorch.callbacks import EpochScoring
        scoring = EpochScoring(scoring=epoch_3_scorer, on_train=True)

        sink = Mock()
        net = net_cls(callbacks=[
            ('my_score', scoring),
            checkpoint_cls(
                monitor='epoch_3_scorer',
                f_params='model_{last_epoch[epoch]}_{net.max_epochs}.pt',
                sink=sink),
        ])
        net.fit(*data)

        assert save_params_mock.call_count == 1
        save_params_mock.assert_called_with('model_3_10.pt')
        assert sink.call_count == 1
        assert all((x is False) for x in net.history[:2, 'event_cp'])
        assert net.history[2, 'event_cp'] is True
        assert all((x is False) for x in net.history[3:, 'event_cp'])
# braindecode object that is responsible for managing the training of neural
# networks. It inherits from :class:`skorch.NeuralNetClassifier`, so the
# training logic is the same as in
# `Skorch <https://skorch.readthedocs.io/en/stable/>`__.
#

from skorch.helper import predefined_split
from skorch.callbacks import EpochScoring
from braindecode import EEGClassifier

lr = 1e-3
batch_size = 32
n_epochs = 3  # we use few epochs for speed and but more than one for plotting

train_bal_acc = EpochScoring(scoring='balanced_accuracy',
                             on_train=True,
                             name='train_bal_acc',
                             lower_is_better=False)
valid_bal_acc = EpochScoring(scoring='balanced_accuracy',
                             on_train=False,
                             name='valid_bal_acc',
                             lower_is_better=False)
callbacks = [('train_bal_acc', train_bal_acc),
             ('valid_bal_acc', valid_bal_acc)]

clf = EEGClassifier(
    model,
    criterion=torch.nn.CrossEntropyLoss,
    criterion__weight=torch.Tensor(class_weights).to(device),
    optimizer=torch.optim.Adam,
    iterator_train__shuffle=False,
    iterator_train__sampler=train_sampler,
示例#30
0
def main():
    parser = argparse.ArgumentParser(
        description='PyTorch RNN with variable-length numeric sequences wrapper'
    )
    parser.add_argument('--outcome_col_name', type=str, required=True)
    parser.add_argument('--train_csv_files', type=str, required=True)
    parser.add_argument('--test_csv_files', type=str, required=True)
    parser.add_argument('--data_dict_files', type=str, required=True)
    parser.add_argument('--batch_size',
                        type=int,
                        default=1024,
                        help='Number of sequences per minibatch')
    parser.add_argument('--epochs',
                        type=int,
                        default=50,
                        help='Number of epochs')
    parser.add_argument('--hidden_units',
                        type=int,
                        default=32,
                        help='Number of hidden units')
    parser.add_argument('--hidden_layers',
                        type=int,
                        default=1,
                        help='Number of hidden layers')
    parser.add_argument('--lr',
                        type=float,
                        default=0.0005,
                        help='Learning rate for the optimizer')
    parser.add_argument('--dropout',
                        type=float,
                        default=0,
                        help='dropout for optimizer')
    parser.add_argument('--weight_decay',
                        type=float,
                        default=0.0001,
                        help='weight decay for optimizer')
    parser.add_argument('--seed', type=int, default=1111, help='random seed')
    parser.add_argument('--validation_size',
                        type=float,
                        default=0.15,
                        help='validation split size')
    parser.add_argument(
        '--is_data_simulated',
        type=bool,
        default=False,
        help='boolean to check if data is simulated or from mimic')
    parser.add_argument(
        '--simulated_data_dir',
        type=str,
        default='simulated_data/2-state/',
        help=
        'dir in which to simulated data is saved.Must be provide if is_data_simulated = True'
    )
    parser.add_argument(
        '--output_dir',
        type=str,
        default=None,
        help=
        'directory where trained model and loss curves over epochs are saved')
    parser.add_argument(
        '--output_filename_prefix',
        type=str,
        default=None,
        help='prefix for the training history jsons and trained classifier')
    args = parser.parse_args()

    torch.manual_seed(args.seed)
    device = 'cpu'

    x_train_csv_filename, y_train_csv_filename = args.train_csv_files.split(
        ',')
    x_test_csv_filename, y_test_csv_filename = args.test_csv_files.split(',')
    x_dict, y_dict = args.data_dict_files.split(',')
    x_data_dict = load_data_dict_json(x_dict)

    # get the id and feature columns
    id_cols = parse_id_cols(x_data_dict)
    feature_cols = parse_feature_cols(x_data_dict)
    # extract data
    train_vitals = TidySequentialDataCSVLoader(
        x_csv_path=x_train_csv_filename,
        y_csv_path=y_train_csv_filename,
        x_col_names=feature_cols,
        idx_col_names=id_cols,
        y_col_name=args.outcome_col_name,
        y_label_type='per_sequence')

    test_vitals = TidySequentialDataCSVLoader(x_csv_path=x_test_csv_filename,
                                              y_csv_path=y_test_csv_filename,
                                              x_col_names=feature_cols,
                                              idx_col_names=id_cols,
                                              y_col_name=args.outcome_col_name,
                                              y_label_type='per_sequence')

    X_train, y_train = train_vitals.get_batch_data(batch_id=0)
    X_test, y_test = test_vitals.get_batch_data(batch_id=0)
    _, T, F = X_train.shape

    print('number of time points : %s\n number of features : %s\n' % (T, F))

    # set class weights as 1/(number of samples in class) for each class to handle class imbalance
    class_weights = torch.tensor(
        [1 / (y_train == 0).sum(), 1 / (y_train == 1).sum()]).double()

    # scale features
    #     X_train = standard_scaler_3d(X_train)
    #     X_test = standard_scaler_3d(X_test)

    # callback to compute gradient norm
    compute_grad_norm = ComputeGradientNorm(norm_type=2)

    # LSTM
    if args.output_filename_prefix == None:
        output_filename_prefix = (
            'hiddens=%s-layers=%s-lr=%s-dropout=%s-weight_decay=%s' %
            (args.hidden_units, args.hidden_layers, args.lr, args.dropout,
             args.weight_decay))
    else:
        output_filename_prefix = args.output_filename_prefix

    print('RNN parameters : ' + output_filename_prefix)
    # #     from IPython import embed; embed()
    rnn = RNNBinaryClassifier(
        max_epochs=50,
        batch_size=args.batch_size,
        device=device,
        lr=args.lr,
        callbacks=[
            EpochScoring('roc_auc',
                         lower_is_better=False,
                         on_train=True,
                         name='aucroc_score_train'),
            EpochScoring('roc_auc',
                         lower_is_better=False,
                         on_train=False,
                         name='aucroc_score_valid'),
            EarlyStopping(monitor='aucroc_score_valid',
                          patience=20,
                          threshold=0.002,
                          threshold_mode='rel',
                          lower_is_better=False),
            LRScheduler(policy=ReduceLROnPlateau,
                        mode='max',
                        monitor='aucroc_score_valid',
                        patience=10),
            compute_grad_norm,
            GradientNormClipping(gradient_clip_value=0.3,
                                 gradient_clip_norm_type=2),
            Checkpoint(monitor='aucroc_score_valid',
                       f_history=os.path.join(
                           args.output_dir, output_filename_prefix + '.json')),
            TrainEndCheckpoint(dirname=args.output_dir,
                               fn_prefix=output_filename_prefix),
        ],
        criterion=torch.nn.CrossEntropyLoss,
        criterion__weight=class_weights,
        train_split=skorch.dataset.CVSplit(args.validation_size),
        module__rnn_type='LSTM',
        module__n_layers=args.hidden_layers,
        module__n_hiddens=args.hidden_units,
        module__n_inputs=X_train.shape[-1],
        module__dropout_proba=args.dropout,
        optimizer=torch.optim.Adam,
        optimizer__weight_decay=args.weight_decay)

    clf = rnn.fit(X_train, y_train)
    y_pred_proba = clf.predict_proba(X_train)
    y_pred_proba_neg, y_pred_proba_pos = zip(*y_pred_proba)
    auroc_train_final = roc_auc_score(y_train, y_pred_proba_pos)
    print('AUROC with LSTM (Train) : %.2f' % auroc_train_final)

    y_pred_proba = clf.predict_proba(X_test)
    y_pred_proba_neg, y_pred_proba_pos = zip(*y_pred_proba)
    auroc_test_final = roc_auc_score(y_test, y_pred_proba_pos)
    print('AUROC with LSTM (Test) : %.2f' % auroc_test_final)