Exemplo n.º 1
0
    def fit(self, X, y=None, **fit_params):
        """Fit the estimator to data.

        This derives the number of object features from the data and then
        delegates to ``skorch.NeuralNet.fit``. See the documentation of that
        method for more details.

        Parameters
        ----------
        X : input data
            May take various forms, such as numpy arrays or torch datasets. See
            the documentation of ``skorch.NeuralNet.fit`` for more details.

        y : target data
            May take the same forms as ``x``. This is optional since the target
            data may already be included in the data structure that is passed
            as ``X``. See the documentation of ``skorch.NeuralNet.fit`` for
            more details.

        **fit_params : dict
            Additional fit parameters. See the documentation of
            ``skorch.NeuralNet.fit`` for more details.
        """
        dataset = self.get_dataset(X, y)
        (_n_objects, self.n_features_) = dataset[0][0].shape
        NeuralNet.fit(self, X=dataset, y=None, **fit_params)
Exemplo n.º 2
0
def test_loss_lowers_on_each_epoch():
    torch.manual_seed(SEED)
    num_classes = 5
    num_features = 5
    size = 200
    y = torch.randint(0, num_classes, (size, 1), dtype=torch.long)
    X = torch.rand((size, num_features))

    predictor = nn.Sequential(nn.Linear(num_features, num_features), nn.ReLU(),
                              nn.Linear(num_features, 1))

    skorch_model = NeuralNet(
        module=OrdinalLogisticModel,
        module__predictor=predictor,
        module__num_classes=num_classes,
        criterion=CumulativeLinkLoss,
        max_epochs=10,
        optimizer=torch.optim.Adam,
        lr=0.01,
        train_split=None,
        callbacks=[
            ('ascension', AscensionCallback()),
        ],
    )

    skorch_model.fit(X, y)
    losses = [epoch['train_loss'] for epoch in skorch_model.history]
    for idx, loss in enumerate(losses[:-1]):
        # Next epoch's loss is less than this epoch's loss.
        assert losses[idx + 1] < loss, 'Loss lowers on each epoch'
Exemplo n.º 3
0
def train(data_folder: str, out_model: str):
    out_model = Path(out_model)
    out_model.mkdir()

    data_paths = list(Path(data_folder).rglob("*.npy"))
    train_paths, valid_paths = train_test_split(data_paths, train_size=0.7)

    train_dataset = LibriSpeechDataset(
        train_paths,
        Path(data_folder).parent / "SPEAKERS.TXT",
        Compose([ExtractStft(),
                 RandomCrop(constants.STFT_CROP_WIDTH)]))

    valid_dataset = LibriSpeechDataset(
        valid_paths,
        Path(data_folder).parent / "SPEAKERS.TXT",
        Compose([ExtractStft(),
                 RandomCrop(constants.STFT_CROP_WIDTH)]))

    net = NeuralNet(Classifier,
                    module__n_classes=constants.NUMBER_OF_CLASSES,
                    criterion=nn.CrossEntropyLoss,
                    batch_size=8,
                    max_epochs=100,
                    optimizer=optim.Adam,
                    lr=0.001,
                    iterator_train__shuffle=True,
                    iterator_train__num_workers=2,
                    iterator_valid__shuffle=False,
                    iterator_valid__num_workers=2,
                    train_split=predefined_split(valid_dataset),
                    device="cuda",
                    callbacks=[
                        Checkpoint(
                            f_params=(out_model / "params.pt").as_posix(),
                            f_optimizer=(out_model / "optim.pt").as_posix(),
                            f_history=(out_model / "history.pt").as_posix()),
                        ProgressBar(postfix_keys=["train_loss", "train_acc"]),
                        EarlyStopping(),
                        EpochScoring(acc,
                                     name="val_acc",
                                     lower_is_better=False,
                                     on_train=False),
                        EpochScoring(acc,
                                     name="train_acc",
                                     lower_is_better=False,
                                     on_train=True),
                        Tensorboard((out_model / "train").as_posix(),
                                    metrics={"acc": acc_as_metric},
                                    is_training=True),
                        Tensorboard((out_model / "valid").as_posix(),
                                    metrics={"acc": acc_as_metric},
                                    is_training=False),
                    ])

    net.fit(train_dataset)
Exemplo n.º 4
0
from skorch import NeuralNet
from skorch.callbacks import EarlyStopping
from torch.nn import MSELoss
from torch.optim import SGD
import numpy as np

import sys
sys.path.append('..')
from helpers import load_data_in_chunks, save_model
from model import Net
from CustomLoss import CustomLoss

(Xs, Ys) = load_data_in_chunks('train', chunk_size=5)
Xs = Xs.astype(np.float32)
Ys = Ys.astype(np.float32)

regr = NeuralNet(Net,
                 max_epochs=10000000000,
                 batch_size=100,
                 iterator_train__shuffle=True,
                 criterion=MSELoss,
                 optimizer=SGD,
                 optimizer__lr=1e-5,
                 optimizer__momentum=0.95,
                 verbose=5,
                 callbacks=[('early_stop', EarlyStopping())])
regr.fit(Xs, Ys / 5000)

save_model(regr, 'lstm-mse')
Exemplo n.º 5
0
    callbacks=[
        cp_best_model,
        cp_best_train,
        progressbar,
        # cyclicLR,
        epoch_MAE_train,
    ])

#######################################################################
# TRAIN MODEL
#######################################################################
print("############################################################")
print("\n\t\tTRAINING MODEL\n")
print("############################################################\n")

model.initialize()
# print("size of inputs fed to optimizer:\t", len(X_train), len(Y_train))
model.fit(X_train, Y_train)

# KFold cross-validation Gridsearch
# from sklearn.model_selection import GridSearchCV
# params = {
#     'optimizer__weight_decay': [0.0005, 0.001, 0.0005, 0.001, 0.005, 0.01],
#     'max_epochs': [40],
#     'lr': [0.05]
# }
# gs = GridSearchCV(model, params, refit=False, scoring = MAE_scorer)
#
# gs.fit(X_train, Y_train)
# print("Best grid search score:\t", gs.best_score_, gs.best_params_)
Exemplo n.º 6
0
def run():
    parser = get_arg_parser()
    cmd_args = parser.parse_args()

    if cmd_args.gpu is not None:
        os.environ['CUDA_VISIBLE_DEVICES'] = str(cmd_args.gpu)
        gpunum = os.getenv('CUDA_VISIBLE_DEVICES')
        logging.info("GPU has been set to {}".format(gpunum))

    logging.info("Model used for the regression network: {}"
                 .format(cmd_args.model_name))

    # 1. Dataset retrieval
    # --------------------

    tab_printer(constants.Dataset)
    dataset = Dataset(nrows=constants.Dataset.nrows,
                      augment_labels=constants.Dataset.augment_labels,
                      top_n=constants.Dataset.top_n)

    logging.info("Going to create vocabulary and fit a preprocessing pipeline"
                 "using {} samples. Settings will be listed below"
                 .format(len(dataset.X_train)))

    # 2. Preprocessing
    # -----------------

    tab_printer(constants.NLP)
    preprocessor = Preprocessing(dataset.X_train)

    # Preprocess documents
    X_train = preprocessor.transform_documents(dataset.X_train)
    X_test = preprocessor.transform_documents(dataset.X_test)

    # 3. Word embeddings with word2vec
    # --------------------------------

    # Train word2vec embeddings if train_word2vec option is selected
    if cmd_args.train_word2vec: utils.embeddings.main()
    weights = get_embedding_tensor(preprocessor)

    # 4. Node embeddings with AttentionWalk
    # -------------------------------------
    args = _generate_deepwalk_parameters(dataset.y_train_graph)
    if cmd_args.train_attentionwalk: train_attention_walk(args)

    graph_embeddings = pd.read_csv(args.embedding_path).iloc[:, 1:].values

    # Get document representations using node embeddings
    y_embedded = _get_label_embeddings(dataset.y_train, graph_embeddings)
    y_test_embedded = _get_label_embeddings(dataset.y_test, graph_embeddings)

    # 5. Regressor Training
    # ---------------------

    device = 'cuda:' + str(os.getenv("CUDA_VISIBLE_DEVICES")) \
        if torch.cuda.is_available() else 'cpu'

    regressor_nn = NeuralNet(
        get_network_class(cmd_args.model_name),
        max_epochs=constants.NeuralNetworkTraining.epochs,
        lr=constants.NeuralNetworkTraining.learning_rate,
        batch_size=constants.NeuralNetworkTraining.batch_size,
        optimizer=torch.optim.Adam,
        criterion=torch.nn.MSELoss,

        module__output_dim=args.dimensions,
        module__embedding=weights,
        module__embedding_dim=constants.NLP.embedding_size,

        device=device,
        train_split=None,
    )

    # Train the regressor neural network
    regressor_nn.fit(X_train, y_embedded.astype(np.float32))

    # 6. Train Multi-label KNN algorithm
    # ----------------------------------

    tab_printer(constants.MLKNN)

    # Train multi-label KNN to turn label embeddings into label predictions
    classifier = MLkNN(k=constants.MLKNN.k, s=constants.MLKNN.s)
    classifier.fit(y_embedded, dataset.y_train)

    # 7. Evaluation
    # -------------

    # Label prediction with documents
    y_test_pred = regressor_nn.predict(X_test)
    preds = classifier.predict(y_test_pred)
    preds_raw = classifier.predict_proba(y_test_pred)

    # Label prediction with label embeddings
    preds_w_labels = classifier.predict(y_test_embedded)
    preds_w_labels_raw = classifier.predict_proba(y_test_embedded)

    # Log evaluation result with label embeddings
    eval_metrics_w_labels = evaluation \
        .all_metrics(preds_w_labels.toarray(),
                     dataset.y_test,
                     yhat_raw=preds_w_labels_raw.toarray())

    logging.info(str(eval_metrics_w_labels))

    # Log evaluation result with documents
    report_evaluation(preds.toarray(),
                      dataset.y_test,
                      yhat_raw=preds_raw.toarray())
Exemplo n.º 7
0
from torch.nn import MSELoss
from torch.optim import SGD
import numpy as np

import sys
sys.path.append('..')
from helpers import load_data_in_chunks, save_model
from model import Net
from RelativeEntropyLoss import RelativeEntropyLoss

(Xs, Ys) = load_data_in_chunks('survival', 'train', chunk_size=5)
Xs = Xs.astype(np.float32)
Ys = Ys.astype(np.float32)

regr = NeuralNet(Net,
                 max_epochs=10000000000,
                 batch_size=100,
                 iterator_train__shuffle=True,
                 criterion=RelativeEntropyLoss,
                 optimizer=SGD,
                 optimizer__lr=1e-5,
                 optimizer__momentum=0.9,
                 optimizer__nesterov=True,
                 optimizer__dampening=0,
                 verbose=5,
                 callbacks=[('early_stop', EarlyStopping())])

regr.fit(Xs, Ys)

save_model(regr, 'conv-survival')
def run():
    parser = get_arg_parser(embedding_classifier=False)
    cmd_args = parser.parse_args()

    if cmd_args.gpu is not None:
        os.environ['CUDA_VISIBLE_DEVICES'] = str(cmd_args.gpu)
        gpunum = os.getenv('CUDA_VISIBLE_DEVICES')
        logging.info("GPU has been set to {}".format(gpunum))

    logging.info("Model used for the classification network: {}".format(
        cmd_args.model_name))

    # 1. Dataset retrieval
    # --------------------

    tab_printer(constants.Dataset)
    dataset = Dataset(nrows=constants.Dataset.nrows,
                      augment_labels=constants.Dataset.augment_labels,
                      top_n=constants.Dataset.top_n)

    logging.info("Going to create vocabulary and fit a preprocessing pipeline"
                 "using {} samples. Settings will be listed below".format(
                     len(dataset.X_train)))

    # 2. Preprocessing
    # -----------------

    tab_printer(constants.NLP)
    preprocessor = Preprocessing(dataset.X_train)

    # Preprocess documents
    X_train = preprocessor.transform_documents(dataset.X_train)
    X_test = preprocessor.transform_documents(dataset.X_test)

    # 3. Word embeddings with word2vec
    # --------------------------------

    # Train word2vec embeddings if train_word2vec option
    # is selected
    if cmd_args.train_word2vec: utils.embeddings.main()
    weights = get_embedding_tensor(preprocessor)

    logging.info("Word embeddings are loaded.")

    # 4. Label Network Optim
    # -----------------------

    device = 'cuda:' + str(os.getenv("CUDA_VISIBLE_DEVICES")) \
        if torch.cuda.is_available() else 'cpu'
    logging.info("Going to run on device: {}".format(device))

    args = _generate_deepwalk_parameters(dataset.y_train_graph)
    label_embeddings = np.array(
        pd.read_csv(args.embedding_path).iloc[:, 1:].values)
    label_embeddings_weights = torch.FloatTensor(label_embeddings)

    label_network = NeuralNet(
        CAML,
        max_epochs=50,
        lr=constants.NeuralNetworkTraining.learning_rate,
        batch_size=constants.NeuralNetworkTraining.batch_size,
        optimizer=torch.optim.Adam,
        criterion=torch.nn.BCEWithLogitsLoss,
        module__output_dim=dataset.y_train.shape[1],
        module__embedding=label_embeddings_weights,
        module__embedding_dim=args.dimensions,
        module__kernel_size=1,
        device=device,
        train_split=skorch.dataset.CVSplit(stratified=False),
    )

    label_network.fit(dataset.y_train, dataset.y_train.astype(np.float32))

    # 5. Evaluation
    # -------------

    yhat_test_raw_logits = label_network.predict_proba(dataset.y_test)
    yhat_test_raw = torch.sigmoid(torch.Tensor(yhat_test_raw_logits)).numpy()
    yhat_test = np.array(yhat_test_raw >=
                         constants.NeuralNetworkTraining.threshold) \
        .astype(np.int64)

    report_evaluation(yhat_test, dataset.y_test, yhat_raw=yhat_test_raw)
def baselineNN_search(parameters):
    """Set up, run and evaluate a baseline neural network"""
    # CV with skorch
    net = NeuralNet(
        # Module
        module=BaselineNN,
        # Module settings
        module__hidden_dim=parameters["hidden_units"],
        module__p_dropout=parameters["dropout"],
        module__use_batch_norm=parameters["use_batch_norm"],
        module__weights=FTEMB,  # These are word embeddings
        module__num_classes=len(category_map),
        # Epochs & learning rate
        max_epochs=25,
        lr=parameters["learning_rate"],
        # Optimizer
        optimizer=optim.Adam
        if parameters["optimizer"] == "Adam" else optim.RMSprop,
        # Loss function
        criterion=nn.CrossEntropyLoss,
        criterion__weight=cw,
        # Shuffle training data on each epoch
        iterator_train__shuffle=True,
        # Batch size
        batch_size=128,
        train_split=CVSplit(cv=5),
        # Device
        device=device,
        # Callbacks
        callbacks=[
            skorch.callbacks.EpochScoring(f1_score,
                                          use_caching=True,
                                          name="valid_f1"),
            skorch.callbacks.EpochScoring(precision_score,
                                          use_caching=True,
                                          name="valid_precision"),
            skorch.callbacks.EpochScoring(recall_score,
                                          use_caching=True,
                                          name="valid_recall"),
            skorch.callbacks.EpochScoring(accuracy_score,
                                          use_caching=True,
                                          name="valid_accuracy")
        ])
    # Verbose to false
    net.verbose = 1
    # Fit
    net = net.fit(WD)
    # Get train / validation history
    train_loss = net.history[:, "train_loss"]
    val_loss = net.history[:, "valid_loss"]
    val_accuracy = net.history[:, "valid_accuracy"]
    val_f1 = net.history[:, "valid_f1"]
    val_precision = net.history[:, "valid_precision"]
    val_recall = net.history[:, "valid_recall"]
    # Min loss
    which_min = np.argmin(val_loss)
    # Write to file
    with open(args.out_file, 'a') as of_connection:
        writer = csv.writer(of_connection)
        writer.writerow([
            parameters, which_min,
            np.round(train_loss[which_min], 4),
            np.round(val_accuracy[which_min], 4),
            np.round(val_loss[which_min], 4),
            np.round(val_f1[which_min], 4),
            np.round(val_precision[which_min], 4),
            np.round(val_recall[which_min], 4)
        ])
    # Return cross-validation loss
    return ({
        "loss": val_loss[which_min],
        "parameters": parameters,
        "iteration": which_min,
        'status': STATUS_OK
    })
                                      use_caching=True,
                                      name="valid_precision"),
        skorch.callbacks.EpochScoring(recall_score,
                                      use_caching=True,
                                      name="valid_recall"),
        skorch.callbacks.EpochScoring(accuracy_score,
                                      use_caching=True,
                                      name="valid_accuracy")
    ])

# Verbose to false
net.verbose = 1

#%% Fit the model

io = net.fit(WD)

# Save model
net.save_params(f_params='models/baselineNN.pkl')

#%% Or load it from disk

net.initialize()
net.load_params(f_params="models/baselineNN.pkl")

#%% Predict on train

# Out
yhat = net.predict(WD)
# Classes
yhatc = yhat.argmax(axis=1)
Exemplo n.º 11
0
lbl17 = OR.getY()
x17 = OR.getAbstract()
x17_t = OR.getTitle()

x = np.concatenate((x18_t, x17_t))
lbl = np.concatenate((lbl18, lbl17))
x_encoded = ohe().fit_transform(x)  #encoded with wntire dataset.


class torch_cv(torch.utils.data.Dataset):
    def __init__(self, x_, lbl_, dtype = [torch.LongTensor, torch.LongTensor]):

        x_ = util.padding(x_, 256)
        
        feature = Variable(dtype[0](x_))
        label = Variable(dtype[1](lbl_))
        
        self.X = feature
        self.Y = label

    def __getitem__(self, idx):
        return self.X[idx], self.Y[idx]

    def __len__(self):
        return len(self.X)

train_ds = torch_cv(x_encoded, lbl)
nn = models.MLP(100, 2)
net = NeuralNet(nn, criterion=nn.loss_function())
net.fit(x_encoded, lbl)
Exemplo n.º 12
0
                    module__output_dim=dataset.output_dim,
                    criterion=nn.BCEWithLogitsLoss,
                    iterator_train__collate_fn=dataset.data_collator,
                    iterator_valid__collate_fn=dataset.data_collator,
                    callbacks=[
                        EpochScoring(scoring=metrics.make_scorer(error_rate),
                                     lower_is_better=True),
                        EpochScoring(scoring=metrics.make_scorer(accuracy),
                                     lower_is_better=False),
                        EarlyStopping(monitor="valid_loss", patience=5),
                        LRScheduler(policy="ReduceLROnPlateau", patience=3)
                    ],
                    device=options.device,
                    **config)

    net.fit(dataset)
    logits = net.forward(dataset)
    probits = torch.sigmoid(logits)
    preds = (probits > .5).long().numpy()
    labels = np.stack(dataset.labels)
    correct = (preds == labels)
    positive = (labels == 1)
    pred_pos = (preds == 1)
    TP = correct & positive
    FP = (~correct) & (~positive)
    TN = correct & (~positive)
    FN = (~correct) & positive
    precision = TP.sum() / pred_pos.sum()
    recall = TP.sum() / positive.sum()
    f1_score = 2 * precision * recall / (precision + recall)
    acc = correct.mean()