Exemplo n.º 1
0
def grid_search(train, grid_params, module, loss, outpath):
    net = NeuralNet(module,
                    loss,
                    optimizer=torch.optim.Adam,
                    max_epochs=MAX_EPOCHS,
                    device=DEVICE,
                    iterator_train__shuffle=False,
                    callbacks=[
                        EarlyStopping("valid_loss",
                                      lower_is_better=True,
                                      patience=PATIENCE)
                    ],
                    train_split=CVSplit(5, random_state=SEED))  #, verbose=0)

    kf = KFold(n_splits=5, shuffle=False, random_state=SEED)
    gs = GridSearchCV(net,
                      grid_params,
                      cv=kf,
                      refit=True,
                      scoring=loss(),
                      verbose=10)

    grid_res = gs.fit(SliceDataset(train), train.y)

    df = pd.DataFrame(grid_res.cv_results_)
    # Adding some meta info
    df["param_feature_set"] = FLEVEL
    df["param_overlapping"] = WINDOW_OVERLAPPING
    df["param_context_len"] = CONTEXT_LEN
    df["param_discretized"] = DISCRETIZED
    df["param_module"] = module.__name__
    fname = f"gridsearch_{FLEVEL}_{CONTEXT_LEN}_{WINDOW_OVERLAPPING}_{DISCRETIZED}_{module.__name__}.pkl"
    df.to_pickle(outpath / fname)
Exemplo n.º 2
0
    def load_extras(self):
        callbacks = []
        load_best_loss = train_end_load_best_loss(self.identifier)
        self.split = CVSplit(cv=self.val_split) if self.val_split != 0 else 0

        metrics = evaluator(
            self.val_split,
            self.config["optim"].get("metric", "mae"),
            self.identifier,
            self.forcetraining,
        )
        callbacks.extend(metrics)

        if not self.debug:
            callbacks.append(load_best_loss)
        scheduler = self.config["optim"].get("scheduler", None)
        if scheduler:
            scheduler = LRScheduler(scheduler,
                                    **self.config["optim"]["scheduler_params"])
            callbacks.append(scheduler)
        if self.config["cmd"].get("logger", False):
            from skorch.callbacks import WandbLogger

            callbacks.append(
                WandbLogger(
                    self.wandb_run,
                    save_model=False,
                    keys_ignored="dur",
                ))
        self.callbacks = callbacks
Exemplo n.º 3
0
 def test_cvsplit_deprecation(self):
     from skorch.dataset import CVSplit
     with pytest.warns(
             DeprecationWarning,
             match="is deprecated, use the new name ValidSplit instead",
     ):
         CVSplit()
Exemplo n.º 4
0
def test_regular(device):
    """
    Tests the LSTMTimeSeriesPredictor fitting
    """
    cuda_check(device)

    start = time.time()
    tsp = TimeSeriesPredictor(
        BenchmarkLSTM(hidden_dim=16),
        lr=1e-3,
        lambda1=1e-8,
        optimizer__weight_decay=1e-8,
        iterator_train__shuffle=True,
        early_stopping=EarlyStopping(patience=50),
        max_epochs=250,
        train_split=CVSplit(10),
        optimizer=Adam,
        device=device,
    )

    past_pattern_length = 24
    future_pattern_length = 12
    pattern_length = past_pattern_length + future_pattern_length
    fsd = FlightSeriesDataset(pattern_length,
                              past_pattern_length,
                              pattern_length,
                              stride=1)
    tsp.fit(fsd)
    end = time.time()
    elapsed = timedelta(seconds=end - start)
    print(f"Fitting in {device} time delta: {elapsed}")
    mean_r2_score = tsp.score(tsp.dataset)
    print(f"Achieved R2 score: {mean_r2_score}")
    assert mean_r2_score > -20
Exemplo n.º 5
0
 def validation_split(X, y):
     """ Custom split is used to apply augmentation to the training set only """
     splitter = CVSplit(cv=int(100 / early_stopping_val_percent),
                        random_state=RANDOM_STATE)
     dataset_train, dataset_valid = splitter(X)
     dataset_train = cls.AugmentedDataset(dataset_train)
     return dataset_train, dataset_valid
Exemplo n.º 6
0
    def build_estimator(hyperparams, train_data, test=False):
        device = 'cuda' if torch.cuda.is_available() else 'cpu'

        # Extract info from training data
        X, y, *_ = train_data
        in_features = X.shape[1]
        n_classes = len(np.unique(y))
        n_samples = y.shape[0]
        bal_weights = torch.from_numpy(
            n_samples / (n_classes * np.bincount(y))).float().to(device)

        callbacks = [
            ('f1_score_valid',
             EpochScoring('f1' if n_classes == 2 else 'f1_macro',
                          name='valid_f1',
                          lower_is_better=False)),
            ('early_stopping',
             EarlyStopping(monitor='valid_loss',
                           patience=5,
                           lower_is_better=True)),
            (
                'learning_rate_scheduler',
                LRScheduler(
                    policy=lr_scheduler.ReduceLROnPlateau,
                    monitor='valid_loss',
                    # Following kargs are passed to the
                    # lr scheduler constructor
                    mode='min',
                    min_lr=1e-5)),
        ]

        return NeuralNetClassifier(
            NNModule,
            criterion=nn.CrossEntropyLoss,
            optimizer=torch.optim.SGD,
            max_epochs=300,
            iterator_train__shuffle=True,  # Shuffle training data on each epoch
            callbacks=callbacks,
            device=device,
            train_split=CVSplit(cv=5,
                                stratified=True,
                                random_state=RANDOM_STATE),
            lr=hyperparams['lr'],
            batch_size=hyperparams['batch_size'],
            module__in_features=in_features,
            module__n_classes=n_classes,
            module__n_layers=hyperparams['n_layers'],
            module__n_neuron_per_layer=hyperparams['n_neuron_per_layer'],
            module__activation=getattr(F, hyperparams['activation']),
            module__p_dropout=hyperparams['p_dropout'],
            criterion__weight=bal_weights
            if hyperparams['class_weight'] == 'balanced' else None,
            optimizer__momentum=hyperparams['momentum'],
            optimizer__weight_decay=hyperparams['weight_decay'],
            optimizer__nesterov=True,
            verbose=3,
            iterator_train__num_workers=4,
            iterator_valid__num_workers=4)
Exemplo n.º 7
0
    def __init__(
            self,
            module,
            criterion,
            optimizer=torch.optim.SGD,
            lr=0.01,
            gradient_clip_value=None,
            gradient_clip_norm_type=2,
            max_epochs=10,
            batch_size=128,
            iterator_train=DataLoader,
            iterator_valid=DataLoader,
            dataset=Dataset,
            train_split=CVSplit(5),
            callbacks=None,
            cold_start=True,
            verbose=1,
            use_cuda=False,
            **kwargs
    ):
        self.module = module
        self.criterion = criterion
        self.optimizer = optimizer
        self.lr = lr
        self.max_epochs = max_epochs
        self.batch_size = batch_size
        self.iterator_train = iterator_train
        self.iterator_valid = iterator_valid
        self.dataset = dataset
        self.train_split = train_split
        self.callbacks = callbacks
        self.cold_start = cold_start
        self.verbose = verbose
        self.use_cuda = use_cuda
        self.gradient_clip_value = gradient_clip_value
        self.gradient_clip_norm_type = gradient_clip_norm_type

        history = kwargs.pop('history', None)
        initialized = kwargs.pop('initialized_', False)

        # catch arguments that seem to not belong anywhere
        unexpected_kwargs = []
        for key in kwargs:
            if key.endswith('_'):
                continue
            if any(key.startswith(p) for p in self.prefixes_):
                continue
            unexpected_kwargs.append(key)
        if unexpected_kwargs:
            msg = ("__init__() got unexpected argument(s) {}."
                   "Either you made a typo, or you added new arguments "
                   "in a subclass; if that is the case, the subclass "
                   "should deal with the new arguments explicitely.")
            raise TypeError(msg.format(', '.join(unexpected_kwargs)))
        vars(self).update(kwargs)

        self.history = history
        self.initialized_ = initialized
Exemplo n.º 8
0
def run_100(task, task_df, args, threshold):
    reduce_lr = LRScheduler(
        policy='ReduceLROnPlateau',
        mode='min',
        factor=0.5,
        patience=1,
    )

    seeds = list(range(args.start_seed, args.start_seed + 100))
    for seed in tqdm(seeds, desc=f'{task} Runs'):
        logger.info(f"Spliting with seed {seed}")
        checkpoint = Checkpoint(dirname=args.modeldir /
                                f'{task}_seed_{seed}', )
        df = set_group_splits(task_df.copy(), group_col='hadm_id', seed=seed)
        vectorizer = TfidfVectorizer(sublinear_tf=True,
                                     ngram_range=(1, 2),
                                     binary=True,
                                     max_features=60_000)

        x_train = vectorizer.fit_transform(
            df.loc[(df['split'] == 'train')]['processed_note']).astype(
                np.float32)
        x_test = vectorizer.transform(
            df.loc[(df['split'] == 'test')]['processed_note']).astype(
                np.float32)

        x_train = np.asarray(x_train.todense())
        x_test = np.asarray(x_test.todense())
        vocab_sz = len(vectorizer.vocabulary_)

        y_train = df.loc[(df['split'] == 'train')][f'{task}_label'].to_numpy()
        y_test = df.loc[(df['split'] == 'test')][f'{task}_label'].to_numpy()

        clf = MLPModule(input_units=vocab_sz,
                        output_units=1,
                        hidden_units=args.hidden_dim,
                        num_hidden=1,
                        dropout=args.dropout_p,
                        squeeze_output=True)

        net = NeuralNetBinaryClassifier(
            clf,
            max_epochs=args.max_epochs,
            lr=args.lr,
            device=args.device,
            optimizer=optim.Adam,
            optimizer__weight_decay=args.wd,
            batch_size=args.batch_size,
            verbose=1,
            callbacks=[EarlyStopping, ProgressBar, checkpoint, reduce_lr],
            train_split=CVSplit(cv=0.15, stratified=True),
            iterator_train__shuffle=True,
            threshold=threshold,
        )
        net.set_params(callbacks__valid_acc=None)
        net.fit(x_train, y_train.astype(np.float32))
Exemplo n.º 9
0
 def __init__(self,
              module,
              *args,
              criterion=torch.nn.NLLLoss,
              train_split=CVSplit(5, stratified=True),
              **kwargs):
     super(NeuralNetClassifier, self).__init__(module,
                                               *args,
                                               criterion=criterion,
                                               train_split=train_split,
                                               **kwargs)
Exemplo n.º 10
0
    def test_net_input_is_scoring_input(
            self, net_cls, module_cls, scoring_cls, data,
    ):
        # Make sure that whatever data type is put in the network is
        # received at the scoring side as well. For the caching case
        # we only receive datasets.
        import skorch
        from skorch.dataset import CVSplit
        import torch.utils.data.dataset
        from torch.utils.data.dataset import Subset

        class MyTorchDataset(torch.utils.data.dataset.TensorDataset):
            def __init__(self, X, y):
                super().__init__(
                    skorch.utils.to_tensor(X.reshape(-1, 1), device='cpu'),
                    skorch.utils.to_tensor(y, device='cpu'))

        class MySkorchDataset(skorch.dataset.Dataset):
            pass

        rawsplit = lambda ds: (ds, ds)
        cvsplit = CVSplit(2, random_state=0)

        def split_ignore_y(ds, y):
            return rawsplit(ds)

        table = [
            # Test a split where type(input) == type(output) is guaranteed
            (data, split_ignore_y, np.ndarray, False),
            (data, split_ignore_y, skorch.dataset.Dataset, True),
            ((MyTorchDataset(*data), None), rawsplit, MyTorchDataset, False),
            ((MyTorchDataset(*data), None), rawsplit, MyTorchDataset, True),
            ((MySkorchDataset(*data), None), rawsplit, np.ndarray, False),
            ((MySkorchDataset(*data), None), rawsplit, MySkorchDataset, True),

            # Test a split that splits datasets using torch Subset
            (data, cvsplit, np.ndarray, False),
            (data, cvsplit, Subset, True),
            ((MyTorchDataset(*data), None), cvsplit, Subset, False),
            ((MyTorchDataset(*data), None), cvsplit, Subset, True),
            ((MySkorchDataset(*data), None), cvsplit, np.ndarray, False),
            ((MySkorchDataset(*data), None), cvsplit, Subset, True),
        ]

        for input_data, train_split, expected_type, caching in table:
            self.net_input_is_scoring_input(
                net_cls,
                module_cls,
                scoring_cls,
                input_data,
                train_split,
                expected_type,
                caching)
Exemplo n.º 11
0
 def __init__(self,
              module,
              *args,
              criterion=torch.nn.BCEWithLogitsLoss,
              train_split=CVSplit(5, stratified=True),
              threshold=0.5,
              **kwargs):
     super().__init__(module,
                      criterion=criterion,
                      train_split=train_split,
                      *args,
                      **kwargs)
     self.threshold = threshold
Exemplo n.º 12
0
    def test_fit_with_dataset_without_explicit_y(
            self, net_cls, module_cls, dataset_cls, data):
        from skorch.dataset import CVSplit

        net = net_cls(
            module_cls,
            max_epochs=1,
            train_split=CVSplit(stratified=False),
        )
        ds = dataset_cls(*data)
        net.fit(ds, None)  # does not raise
        for key in ('train_loss', 'valid_loss', 'valid_acc'):
            assert key in net.history[-1]
Exemplo n.º 13
0
    def test_fit_with_dataset_stratified_without_explicit_y_raises(
            self, net_cls, module_cls, dataset_cls, data):
        from skorch.dataset import CVSplit

        net = net_cls(
            module_cls,
            train_split=CVSplit(stratified=True),
        )
        ds = dataset_cls(*data)
        with pytest.raises(ValueError) as exc:
            net.fit(ds, None)

        msg = "Stratified CV requires explicitely passing a suitable y."
        assert exc.value.args[0] == msg
    def build_estimator(hyperparams, train_data, test=False):
        device = "cuda" if torch.cuda.is_available() else "cpu"

        # Extract info from training data
        X, y, *_ = train_data
        in_features = X.shape[1]

        callbacks = [
            ("r2_score_valid", EpochScoring("r2", lower_is_better=False)),
            (
                "early_stopping",
                EarlyStopping(monitor="valid_loss", patience=5, lower_is_better=True),
            ),
            (
                "learning_rate_scheduler",
                LRScheduler(
                    policy=lr_scheduler.ReduceLROnPlateau,
                    monitor="valid_loss",
                    # Following kargs are passed to the
                    # lr scheduler constructor
                    mode="min",
                    min_lr=1e-5,
                ),
            ),
        ]

        return NeuralNetRegressor(
            NNModule,
            criterion=nn.MSELoss,
            optimizer=torch.optim.SGD,
            max_epochs=300,
            iterator_train__shuffle=True,  # Shuffle training data on each epoch
            callbacks=callbacks,
            device=device,
            train_split=CVSplit(cv=5, random_state=RANDOM_STATE),
            lr=hyperparams["lr"],
            batch_size=hyperparams["batch_size"],
            module__in_features=in_features,
            module__n_layers=hyperparams["n_layers"],
            module__n_neuron_per_layer=hyperparams["n_neuron_per_layer"],
            module__activation=getattr(F, hyperparams["activation"]),
            module__p_dropout=hyperparams["p_dropout"],
            optimizer__momentum=hyperparams["momentum"],
            optimizer__weight_decay=hyperparams["weight_decay"],
            optimizer__nesterov=True,
            verbose=3,
            iterator_train__num_workers=4,
            iterator_valid__num_workers=4,
        )
Exemplo n.º 15
0
 def _DDTClassifier(self):
     if self.dataset == 'ttt':
         depth = 13
         enc = OneHotEncoder(categories='auto', sparse=False)
         enc = enc.fit(self.x)
         self.feat_lbl = enc.get_feature_names(self.feat_lbl).tolist()
         self.x = enc.transform(self.x).astype(np.float32)
     elif self.dataset == 'cancer':
         depth = 8
     elif self.dataset == 'cesarean':
         depth = 11
         enc = OneHotEncoder(
             categories='auto',
             sparse=False,
         )
         enc = enc.fit(self.x)
         self.feat_lbl = enc.get_feature_names(self.feat_lbl).tolist()
         self.x = enc.transform(self.x).astype(np.float32)
     else:
         raise ValueError('[ERROR] Invalid Dataset.')
     n_output = len(self.target_lbl)
     n_input = self.x.shape[1]
     module = lambda: FDDTN(depth=depth,
                            n_input=n_input,
                            n_output=n_output,
                            continuous=False,
                            labels=self.feat_lbl,
                            param_initer=lambda *x: 0.5 * torch.ones(*x),
                            action_labels=self.target_lbl)
     NeuralNetClassifier.train_step_single = train_step_single_monkey_patch
     NeuralNetClassifier.validation_step = validation_step_monkey_patch
     NeuralNetClassifier._default_callbacks = property(
         callbacks_monkey_patch)
     self.classifier = NeuralNetClassifier(
         module=module,
         criterion=nn.CrossEntropyLoss,
         optimizer=Adam,
         train_split=CVSplit(cv=0.3),
         # callbacks=[('EarlyStopping', EarlyStopping(patience=20,
         #                                            threshold=1e-6,
         #                                            threshold_mode='abs'))],
         lr=1e-2,
         max_epochs=600,
         batch_size=256,
         device='cuda')
     self.classifier.encoder = self.enc
     self._reset_classifier()
     return self.classifier
Exemplo n.º 16
0
    def __call__(self,
                 dataset: Union[Dataset, np.array],
                 y: Union[torch.Tensor, np.array, None] = None):

        valid = getattr(dataset, 'valid', None)

        if valid is not None:
            return dataset, valid

        if isinstance(self.split, CVSplit):
            train, valid = self.split(dataset, y)

        else:
            split = CVSplit(self.split)
            train, valid = split(dataset, y)

        return train, valid
Exemplo n.º 17
0
 def run(self):
     data = self.load('data')
     sc = self.load('sc')
     Y = data['author'].values
     Y = Y.astype('float32')
     X = data.drop(columns=['author']).values
     X = sc.transform(X)
     global INPUT_LEN
     INPUT_LEN = len(X[0])
     model = NeuralNetBinaryClassifier(NN,
                                       max_epochs=100,
                                       iterator_train__shuffle=True,
                                       train_split=CVSplit(cv=10,
                                                           stratified=True,
                                                           random_state=0))
     model.fit(X, Y)
     self.dump(model)
    def build_estimator(hyperparams, train_data, test=False):
        device = 'cuda' if torch.cuda.is_available() else 'cpu'

        # Extract info from training data
        X, y, *_ = train_data
        in_features = X.shape[1]

        callbacks = [
            ('r2_score_valid', EpochScoring('r2',
                                            lower_is_better=False)),
            ('early_stopping', EarlyStopping(monitor='valid_loss',
                                             patience=5,
                                             lower_is_better=True)),
            ('learning_rate_scheduler', LRScheduler(policy=lr_scheduler.ReduceLROnPlateau,
                                                    monitor='valid_loss',
                                                    # Following kargs are passed to the
                                                    # lr scheduler constructor
                                                    mode='min',
                                                    min_lr=1e-5
                                                    )),
        ]

        return NeuralNetRegressor(
            NNModule,
            criterion=nn.MSELoss,
            optimizer=torch.optim.SGD,
            max_epochs=300,
            iterator_train__shuffle=True, # Shuffle training data on each epoch
            callbacks=callbacks,
            device=device,
            train_split=CVSplit(cv=5, random_state=RANDOM_STATE),
            lr=hyperparams['lr'],
            batch_size=hyperparams['batch_size'],
            module__in_features=in_features,
            module__n_layers=hyperparams['n_layers'],
            module__n_neuron_per_layer=hyperparams['n_neuron_per_layer'],
            module__activation=getattr(F, hyperparams['activation']),
            module__p_dropout=hyperparams['p_dropout'],
            optimizer__momentum=hyperparams['momentum'],
            optimizer__weight_decay=hyperparams['weight_decay'],
            optimizer__nesterov=True,
            verbose=3,
            iterator_train__num_workers=4,
            iterator_valid__num_workers=4
        )
Exemplo n.º 19
0
    def __init__(self,
                 module,
                 critic,
                 *args,
                 train_split=CVSplit(10),
                 train_generator_every=1,
                 critic_optimizer=torch.optim.SGD,
                 **kwargs):

        self.critic = critic
        self.critic_optimizer = critic_optimizer
        self.train_generator_every = train_generator_every

        super().__init__(module,
                         *args,
                         criterion=None,
                         train_split=train_split,
                         **kwargs)
def test_transformer_tsp_multisamples(device):
    '''multivariate test'''
    cuda_check(device)

    start = time.time()
    tsp = TimeSeriesPredictor(
        Transformer(d_model=12),
        lr=1e-5,
        lambda1=1e-8,
        optimizer__weight_decay=1e-8,
        iterator_train__shuffle=True,
        early_stopping=EarlyStopping(patience=100),
        max_epochs=500,
        train_split=CVSplit(10),
        optimizer=Adam,
        device=device,
    )

    past_pattern_length = 24
    future_pattern_length = 12
    pattern_length = past_pattern_length + future_pattern_length
    # pylint: disable-next=line-too-long
    fsd = FlightSeriesDataset(pattern_length,
                              future_pattern_length,
                              pattern_length,
                              stride=1,
                              generate_test_dataset=True)
    tsp.fit(fsd)
    end = time.time()
    elapsed = timedelta(seconds=end - start)
    print(f"Fitting in {device} time delta: {elapsed}")

    mean_r2_score = tsp.score(tsp.dataset)
    assert mean_r2_score > -0.5

    netout = tsp.predict(fsd.test.x)

    idx = np.random.randint(0, len(fsd.test.x))

    y_true = fsd.test.y[idx, :, :]
    y_hat = netout[idx, :, :]
    r2s = r2_score(y_true, y_hat)
    assert r2s > -1
    print(f"Final R2 score: {r2s}")
Exemplo n.º 21
0
    def __init__(self,
                 module,
                 critic,
                 *args,
                 criterion=nn.BCELoss,
                 train_split=CVSplit(10),
                 train_generator_every=1,
                 critic_optimizer=torch.optim.SGD,
                 **kwargs):

        super().__init__(module,
                         *args,
                         critic=critic,
                         critic_optimizer=critic_optimizer,
                         train_split=train_split,
                         train_generator_every=train_generator_every,
                         **kwargs)

        self.criterion = criterion
    def final_fit(self, dataset, best_params=None, train_all=False):
        if best_params is None:
            best_params = self.model.get_params()
        # Once the best parameters are found, the best model is trained on the whole training set.
        if not self.model.train_split is None and isinstance(self.model.train_split, CVSplit):
            best_params.update({"train_split": None if train_all else CVSplit(5)})
        # Callbacks
        train_acc = BatchScoring(scoring='accuracy', on_train=True,
                                 name='train_acc', lower_is_better=False)
        # Callbacks
        valid_acc = BatchScoring(scoring='accuracy', on_train=False,
                                 name='valid_acc', lower_is_better=False)
        best_params.update({"callbacks": [train_acc, valid_acc]})

        self.best_model.set_params(**best_params)
        self.best_model.fit(dataset, None)
        # saving
        with open(path.join(ARTIFACTS_DIR, self.name + '.pkl'), 'wb') as f:
            pickle.dump(self, f)
Exemplo n.º 23
0
    def __init__(self,
                 module,
                 *args,
                 valid_transform=None,
                 criterion=torch.nn.CrossEntropyLoss,
                 train_split=CVSplit(5, stratified=False),
                 classes=None,
                 **kwargs):
        super().__init__(module,
                         *args,
                         criterion=criterion,
                         train_split=train_split,
                         classes=classes,
                         **kwargs)

        if valid_transform is None:
            self.valid_transform = Compose(
                [ToTensor(),
                 Normalize((MEAN_PIXEL, ), (STD_PIXEL, ))])
        else:
            self.valid_transform = valid_transform
Exemplo n.º 24
0
    def __init__(self,
                 module: nn.Module,
                 model_name: str,
                 sub_folder: str,
                 hyperparamters: dict,
                 optimizer,
                 gesture_list: list,
                 callbacks: list,
                 train_split=CVSplit(cv=0.1, random_state=0)):
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        super(SiameseEMG, self).__init__(
            module,
            criterion=nn.TripletMarginLoss,
            optimizer=optimizer,
            lr=hyperparamters['lr'],
            max_epochs=hyperparamters['epoch'],
            train_split=train_split,
            callbacks=callbacks,
            device=device,
            iterator_train__shuffle=True,
            iterator_train__num_workers=4,
            iterator_train__batch_size=hyperparamters['train_batch_size'],
            iterator_valid__shuffle=False,
            iterator_valid__num_workers=4,
            iterator_valid__batch_size=hyperparamters['valid_batch_size'])
        self.model_name = model_name
        self.hyperparamters = hyperparamters
        self.distance = PairwiseDistance().to(self.device)
        self.model_path = generate_folder('checkpoints',
                                          model_name,
                                          sub_folder=sub_folder)

        self.clf = KNeighborsClassifier(n_neighbors=5)
        self.clf_fit = False
        self.anchors = []
        self.labels = []

        print('build a new model, init parameters of {}'.format(model_name))
        param_dict = self.load_pretrained("pretrained_end_params.pt")
        self.module.load_state_dict(param_dict)
Exemplo n.º 25
0
def prepare_learnt_model(model_args, path_tfms, is_meta, verbose=2):
    """Model builder if learnt transforms are involved.

    The key difference (as explained in prepare_non_learnt_model) between this function and prepare_non_learnt_model
    is that the

    Args:
        model_args (dict): Experiment model args as defined in the main experiment function.
        path_tfms (Pipeline): An sklearn pipeline of path transformations to be applied before model training.
        is_meta (bool): Set True for a dyadic meta model.
        verbose (int): Output verbosity level.

    Returns:

    """
    # Initialise the signature string class.
    model_args['is_meta'] = is_meta
    module = SignatureStringModel(**model_args)

    model = NeuralNetClassifier(
        module=module,
        criterion=nn.BCEWithLogitsLoss if model_args['out_channels'] == 1 else nn.CrossEntropyLoss,
        batch_size=64,
        verbose=verbose,
        iterator_train__drop_last=True,
        callbacks=[
            ('scheduler', LRScheduler(policy='ReduceLROnPlateau')),
            ('val_stopping', EarlyStopping(monitor='valid_loss', patience=30)),
            ('checkpoint', CustomCheckpoint(monitor='valid_loss_best')),
            ('scorer', EpochScoring(custom_scorer, lower_is_better=False, name='true_acc'))
        ],
        train_split=CVSplit(cv=5, random_state=1, stratified=True),
        device=device if model_args['gpu'] else 'cpu',
    )
    pipeline = Pipeline([
        *path_tfms,
        ('classifier', model)
    ])
    return pipeline
def test_main(stride, test_main_context):
    context = test_main_context(stride)
    past_pattern_length = context['past_pattern_length']
    future_pattern_length = context['future_pattern_length']
    pattern_length = past_pattern_length + future_pattern_length
    tsp = TimeSeriesPredictor(
        BenchmarkLSTM(
            initial_forget_gate_bias=1,
            hidden_dim=7,
            num_layers=1,
        ),
        lr=context['lr'],
        lambda1=1e-8,
        optimizer__weight_decay=1e-8,
        iterator_train__shuffle=True,
        early_stopping=EarlyStopping(patience=100),
        max_epochs=500,
        train_split=CVSplit(context['n_cv_splits']),
        optimizer=Adam,
    )
    fsd = FlightSeriesDataset(pattern_length,
                              future_pattern_length,
                              context['except_last_n'],
                              stride=stride,
                              generate_test_dataset=True)
    tsp.fit(fsd)

    mean_r2_score = tsp.score(tsp.dataset)
    assert mean_r2_score > context['mean_r2_score']

    netout = tsp.predict(fsd.test.x)

    idx = np.random.randint(0, len(fsd.test.x))

    y_true = fsd.test.y[idx, :, :]
    y_hat = netout[idx, :, :]
    r2s = r2_score(y_true, y_hat)
    print("Final R2 score: {}".format(r2s))
    assert r2s > context['final_r2_score']
Exemplo n.º 27
0
def configureAnchor(outpath, checkpoint: Path = None):
    batch_size = 4096
    lr = 1e-4
    model_args = {
        "module__pool": "last",
        "module__input_size": 19,
        "module__rnn_size": 128,
        "module__rnn_layers": 3,
        "module__latent_size": 128
    }

    dist_plot = Callbacks.DistPlot(outpath)
    loss_plot = Callbacks.EpochPlot(outpath, ["train_loss", "valid_loss"])

    net = ad.WindowedAnomalyDetector(tripletloss.GruLinear,
                                     tripletloss.ContextualCoherency,
                                     optimizer=torch.optim.Adam,
                                     iterator_train__shuffle=False,
                                     lr=lr,
                                     batch_size=batch_size,
                                     max_epochs=MAX_EPOCHS,
                                     **model_args,
                                     device=DEVICE,
                                     verbose=1,
                                     train_split=CVSplit(5, random_state=SEED),
                                     callbacks=[
                                         dist_plot, loss_plot,
                                         EarlyStopping("valid_loss",
                                                       lower_is_better=True,
                                                       patience=PATIENCE)
                                     ])
    if checkpoint is not None:
        net.initialize_context(int(TL_CONTEXT_LEN / 2))
        net.initialize()
        state_dict = torch.load(str(checkpoint),
                                map_location=torch.device("cpu"))
        net.module_.load_state_dict(state_dict)

    return net
Exemplo n.º 28
0
def configureSeq2Seq(outpath, checkpoint: Path = None):
    batch_size = 64
    lr = 1e-4
    model_args = {
        "module__pool": "mean",
        "module__input_size": 19,
        "module__teacher_forcing_ratio": 7.,
        "module__rnn_layers": 1,
        "module__latent_size": 128
    }

    loss_plot = Callbacks.EpochPlot(outpath, ["train_loss", "valid_loss"])

    net = ad.WindowedAnomalyDetector(autoencoder.Seq2Seq,
                                     autoencoder.ReconstructionError,
                                     optimizer=torch.optim.Adam,
                                     iterator_train__shuffle=False,
                                     lr=lr,
                                     batch_size=batch_size,
                                     max_epochs=MAX_EPOCHS,
                                     **model_args,
                                     device=DEVICE,
                                     verbose=1,
                                     train_split=CVSplit(5, random_state=SEED),
                                     callbacks=[
                                         loss_plot,
                                         EarlyStopping("valid_loss",
                                                       lower_is_better=True,
                                                       patience=PATIENCE)
                                     ])
    if checkpoint is not None:
        net.initialize_context(SEQ2SEQ_CONTEXT_LEN)
        net.initialize()
        state_dict = torch.load(str(checkpoint),
                                map_location=torch.device("cpu"))
        net.module_.load_state_dict(state_dict)

    return net
Exemplo n.º 29
0
    def __init__(self,
                 module,
                 alpha=1,
                 regularizer='none',
                 criterion=torch.nn.NLLLoss,
                 train_split=CVSplit(5, stratified=True),
                 classes=None,
                 *args,
                 **kwargs):
        self.alpha = alpha
        self.regularizer = regularizer

        if 'regularizer' in kwargs.keys():
            kwargs.pop('regularizer')
        if 'alpha' in kwargs.keys():
            kwargs.pop('alpha')

        super().__init__(module,
                         *args,
                         criterion=criterion,
                         train_split=train_split,
                         classes=classes,
                         **kwargs)
Exemplo n.º 30
0
 def __init__(
     self,
     module: nn.Module,
     model_name: str,
     sub_folder: str,
     hyperparamters: dict,
     optimizer,
     gesture_list: list,  # all gestures index
     callbacks: list,
     # train_new_model=True,
     train_split=CVSplit(cv=0.1, random_state=0)):
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
     super(EMGClassifier, self).__init__(
         module,
         criterion=nn.CrossEntropyLoss,
         optimizer=optimizer,
         lr=hyperparamters['lr'],
         max_epochs=hyperparamters['epoch'],
         train_split=train_split,
         callbacks=callbacks,
         device=device,
         iterator_train__shuffle=True,
         iterator_train__num_workers=4,
         iterator_train__batch_size=hyperparamters['train_batch_size'],
         iterator_valid__shuffle=False,
         iterator_valid__num_workers=4,
         iterator_valid__batch_size=hyperparamters['valid_batch_size'])
     self.model_name = model_name
     self.hyperparamters = hyperparamters
     # self.extend_scale = dataset.scale
     self._gesture_mapping = None
     self._all_gestures = gesture_list
     self.module.apply(init_parameters)
     self.model_trained = False
     self.model_path = generate_folder('checkpoints',
                                       model_name,
                                       sub_folder=sub_folder)