def test_history_callback(optimizers, schedulers, len_loss_output,
                          len_lr_output, init_lr, schedulers_type):
    model.compile(
        method="binary",
        optimizers=optimizers,
        lr_schedulers=schedulers,
        callbacks=[LRHistory(n_epochs=5)],
        verbose=0,
    )
    model.fit(
        X_wide=X_wide,
        X_deep=X_deep,
        X_text=X_text,
        target=target,
        n_epochs=5,
        batch_size=16,
    )
    out = []
    out.append(len(model.history._history["train_loss"]) == len_loss_output)
    try:
        lr_list = list(chain.from_iterable(model.lr_history["lr_deepdense_0"]))
    except TypeError:
        lr_list = model.lr_history["lr_deepdense_0"]
    except Exception:
        lr_list = model.lr_history["lr_0"]
    out.append(len(lr_list) == len_lr_output)
    if init_lr is not None and schedulers_type == "step":
        out.append(lr_list[-1] == init_lr / 10)
    elif init_lr is not None and schedulers_type == "cyclic":
        out.append(lr_list[-1] == init_lr)
    assert all(out)
Пример #2
0
def test_history_callback_w_tabtransformer(
    optimizers, schedulers, len_loss_output, len_lr_output, init_lr, schedulers_type
):
    trainer_tt = Trainer(
        model_tt,
        objective="binary",
        optimizers=optimizers,
        lr_schedulers=schedulers,
        callbacks=[LRHistory(n_epochs=5)],
        verbose=0,
    )
    trainer_tt.fit(
        X_wide=X_wide,
        X_tab=X_tab,
        target=target,
        n_epochs=5,
        batch_size=16,
    )
    out = []
    out.append(len(trainer_tt.history["train_loss"]) == len_loss_output)
    try:
        lr_list = list(chain.from_iterable(trainer_tt.lr_history["lr_deeptabular_0"]))
    except TypeError:
        lr_list = trainer_tt.lr_history["lr_deeptabular_0"]
    except Exception:
        lr_list = trainer_tt.lr_history["lr_0"]
    out.append(len(lr_list) == len_lr_output)
    if init_lr is not None and schedulers_type == "step":
        out.append(lr_list[-1] == init_lr / 10)
    elif init_lr is not None and schedulers_type == "cyclic":
        out.append(lr_list[-1] == init_lr)
    assert all(out)
Пример #3
0
def test_history_callback(optimizers, schedulers, len_loss_output, len_lr_output):
	model.compile(method='binary', optimizers=optimizers, lr_schedulers=schedulers,
		callbacks=[LRHistory(n_epochs=5)], verbose=0)
	model.fit(X_wide=X_wide, X_deep=X_deep, X_text=X_text, target=target, n_epochs=5)
	out = []
	out.append(len(model.history._history['train_loss'])==len_loss_output)
	try:
		lr_list = list(chain.from_iterable(model.lr_history['lr_deepdense_0']))
	except TypeError:
		lr_list = model.lr_history['lr_deepdense_0']
	out.append(len(lr_list)==len_lr_output)
	assert all(out)
steps_per_epoch = (X_train.shape[0] // args.batch_size) + 1
lr_schedulers = set_lr_scheduler(optimizers, steps_per_epoch, args)
early_stopping = EarlyStopping(
    monitor=args.monitor,
    min_delta=args.early_stop_delta,
    patience=args.early_stop_patience,
)

trainer = Trainer(
    model,
    objective="regression",
    optimizers=optimizers,
    lr_schedulers=lr_schedulers,
    reducelronplateau_criterion=args.monitor.split("_")[-1],
    callbacks=[early_stopping, LRHistory(n_epochs=args.n_epochs)],
)

start = time()
trainer.fit(
    X_train={"X_tab": X_train, "target": y_train},
    X_val={"X_tab": X_valid, "target": y_valid},
    n_epochs=args.n_epochs,
    batch_size=args.batch_size,
    validation_freq=args.eval_every,
)
runtime = time() - start

if args.save_results:
    suffix = str(datetime.now()).replace(" ", "_").split(".")[:-1][0]
    filename = "_".join(["fb_comments_tabresnet", suffix]) + ".p"
Пример #5
0
steps_per_epoch = (X_train.shape[0] // args.batch_size) + 1
lr_schedulers = set_lr_scheduler(optimizers, steps_per_epoch, args)
early_stopping = EarlyStopping(
    monitor=args.monitor,
    min_delta=args.early_stop_delta,
    patience=args.early_stop_patience,
)

trainer = Trainer(
    model,
    objective="regression",
    optimizers=optimizers,
    lr_schedulers=lr_schedulers,
    reducelronplateau_criterion=args.monitor.split("_")[-1],
    callbacks=[early_stopping,
               LRHistory(n_epochs=args.n_epochs)],
)

start = time()
trainer.fit(
    X_train={
        "X_tab": X_train,
        "target": y_train
    },
    X_val={
        "X_tab": X_valid,
        "target": y_valid
    },
    n_epochs=args.n_epochs,
    batch_size=args.batch_size,
    validation_freq=args.eval_every,
Пример #6
0
        deep_column_idx=prepare_deep.deep_column_idx,
        embed_input=prepare_deep.embeddings_input,
        continuous_cols=continuous_cols,
    )
    model = WideDeep(wide=wide, deepdense=deepdense)

    wide_opt = torch.optim.Adam(model.wide.parameters())
    deep_opt = RAdam(model.deepdense.parameters())
    wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=3)
    deep_sch = torch.optim.lr_scheduler.StepLR(deep_opt, step_size=5)

    optimizers = {"wide": wide_opt, "deepdense": deep_opt}
    schedulers = {"wide": wide_sch, "deepdense": deep_sch}
    initializers = {"wide": KaimingNormal, "deepdense": XavierNormal}
    callbacks = [
        LRHistory(n_epochs=10),
        EarlyStopping,
        ModelCheckpoint(filepath="model_weights/wd_out"),
    ]
    metrics = [BinaryAccuracy]

    model.compile(
        method="binary",
        optimizers=optimizers,
        lr_schedulers=schedulers,
        initializers=initializers,
        callbacks=callbacks,
        metrics=metrics,
    )

    model.fit(
Пример #7
0
)

model_checkpoint = ModelCheckpoint(
    filepath=str(MODELS_DIR / "best_model"),
    monitor=args.monitor,
    save_best_only=True,
    max_save=1,
)

trainer = Trainer(
    model,
    objective="binary",
    optimizers=optimizers,
    lr_schedulers=lr_schedulers,
    reducelronplateau_criterion=args.monitor.split("_")[-1],
    callbacks=[early_stopping, model_checkpoint, LRHistory(n_epochs=args.n_epochs)],
    metrics=[Accuracy],
)

start = time()
trainer.fit(
    X_train={"X_tab": X_train, "target": y_train},
    X_val={"X_tab": X_test, "target": y_test},
    n_epochs=args.n_epochs,
    batch_size=args.batch_size,
    validation_freq=args.eval_every,
)
runtime = time() - start

y_pred = trainer.predict(X_tab=X_test)
acc = accuracy_score(y_test, y_pred)
def run_experiment_and_save(
    model,
    model_name,
    results_dir,
    models_dir,
    args,
    X_train,
    X_test,
    y_train,
    y_test,
):

    optimizers = set_optimizer(model, args)

    steps_per_epoch = (X_train.shape[0] // args.batch_size) + 1
    lr_schedulers = set_lr_scheduler(optimizers, steps_per_epoch, args)

    early_stopping = EarlyStopping(
        monitor=args.monitor,
        min_delta=args.early_stop_delta,
        patience=args.early_stop_patience,
    )

    model_checkpoint = ModelCheckpoint(
        filepath=str(models_dir / "best_model"),
        monitor=args.monitor,
        save_best_only=True,
        max_save=1,
    )

    trainer = Trainer(
        model,
        objective="regression",
        optimizers=optimizers,
        lr_schedulers=lr_schedulers,
        reducelronplateau_criterion=args.monitor.split("_")[-1],
        callbacks=[
            early_stopping, model_checkpoint,
            LRHistory(n_epochs=args.n_epochs)
        ],
    )

    start = time()
    trainer.fit(
        X_train={
            "X_tab": X_train,
            "target": y_train
        },
        X_val={
            "X_tab": X_test,
            "target": y_test
        },
        n_epochs=args.n_epochs,
        batch_size=args.batch_size,
        validation_freq=args.eval_every,
    )
    runtime = time() - start

    y_pred = trainer.predict(X_tab=X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    print(f"rmse with the best model: {rmse}")

    if args.save_results:
        suffix = str(datetime.now()).replace(" ", "_").split(".")[:-1][0]
        filename = "_".join(["fb_comments", model_name, "best", suffix]) + ".p"
        results_d = {}
        results_d["args"] = args
        results_d["rmse"] = rmse
        results_d["r2"] = r2
        results_d["early_stopping"] = early_stopping
        results_d["trainer_history"] = trainer.history
        results_d["trainer_lr_history"] = trainer.lr_history
        results_d["runtime"] = runtime
        with open(results_dir / filename, "wb") as f:
            pickle.dump(results_d, f)
def run_experiment_and_save(
    model,
    model_name,
    results_dir,
    models_dir,
    args,
    X_train,
    X_test,
    y_train,
    y_test,
    fl_exp_indx: int = 0,
):

    try:
        if args.focal_loss:
            alpha, gamma = load_focal_loss_params(results_dir, fl_exp_indx)
            focal_loss = True
        else:
            alpha = 0.25
            gamma = 2
            focal_loss = False
    except AttributeError:
        alpha = 0.25
        gamma = 2
        focal_loss = False

    optimizers = set_optimizer(model, args)

    steps_per_epoch = (X_train.shape[0] // args.batch_size) + 1
    lr_schedulers = set_lr_scheduler(optimizers, steps_per_epoch, args)

    early_stopping = EarlyStopping(
        monitor=args.monitor,
        min_delta=args.early_stop_delta,
        patience=args.early_stop_patience,
    )

    model_checkpoint = ModelCheckpoint(
        filepath=str(models_dir / "best_model"),
        monitor=args.monitor,
        save_best_only=True,
        max_save=1,
    )

    trainer = Trainer(
        model,
        objective="binary_focal_loss" if focal_loss else "binary",
        optimizers=optimizers,
        lr_schedulers=lr_schedulers,
        reducelronplateau_criterion=args.monitor.split("_")[-1],
        callbacks=[early_stopping, model_checkpoint, LRHistory(n_epochs=args.n_epochs)],
        metrics=[Accuracy, F1Score],
        alpha=alpha,
        gamma=gamma,
    )

    start = time()
    trainer.fit(
        X_train={"X_tab": X_train, "target": y_train},
        X_val={"X_tab": X_test, "target": y_test},
        n_epochs=args.n_epochs,
        batch_size=args.batch_size,
        validation_freq=args.eval_every,
    )
    runtime = time() - start

    y_pred = trainer.predict(X_tab=X_test)

    acc = accuracy_score(y_test, y_pred)
    auc = roc_auc_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    print(f"Accuracy: {acc}. F1: {f1}. ROC_AUC: {auc}")
    print(confusion_matrix(y_test, y_pred))

    if args.save_results:
        suffix = str(datetime.now()).replace(" ", "_").split(".")[:-1][0]
        filename = "_".join(["bankm", model_name, "best", suffix]) + ".p"
        results_d = {}
        results_d["args"] = args
        results_d["acc"] = acc
        results_d["auc"] = auc
        results_d["f1"] = f1
        results_d["early_stopping"] = early_stopping
        results_d["trainer_history"] = trainer.history
        results_d["trainer_lr_history"] = trainer.lr_history
        results_d["runtime"] = runtime
        with open(results_dir / filename, "wb") as f:
            pickle.dump(results_d, f)
Пример #10
0
        def objective(params):

            deeptabular = TabMlp(
                column_idx=prepare_tab.column_idx,
                mlp_hidden_dims=mlp_hidden_dims,
                mlp_activation=args.mlp_activation,
                mlp_dropout=args.mlp_dropout,
                mlp_batchnorm=args.mlp_batchnorm,
                mlp_batchnorm_last=args.mlp_batchnorm_last,
                mlp_linear_first=args.mlp_linear_first,
                embed_input=prepare_tab.embeddings_input,
                embed_dropout=args.embed_dropout,
            )
            model = WideDeep(deeptabular=deeptabular)

            optimizer = torch.optim.Adam(model.parameters(),
                                         lr=args.lr,
                                         weight_decay=args.weight_decay)

            lr_scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
                optimizer,
                mode=args.rop_mode,
                factor=args.rop_factor,
                patience=args.rop_patience,
                threshold=args.rop_threshold,
                threshold_mode=args.rop_threshold_mode,
            )

            early_stopping = EarlyStopping(
                monitor=args.monitor,
                min_delta=args.early_stop_delta,
                patience=args.early_stop_patience,
            )
            trainer = Trainer(
                model,
                objective="binary_focal_loss",
                optimizers=optimizer,
                lr_schedulers=lr_scheduler,
                reducelronplateau_criterion=args.monitor.split("_")[-1],
                callbacks=[early_stopping,
                           LRHistory(n_epochs=args.n_epochs)],
                metrics=[Accuracy, F1Score],
                alpha=params["alpha"],
                gamma=params["gamma"],
                verbose=0,
            )

            trainer.fit(
                X_train={
                    "X_tab": X_train,
                    "target": y_train
                },
                X_val={
                    "X_tab": X_valid,
                    "target": y_valid
                },
                n_epochs=args.n_epochs,
                batch_size=args.batch_size,
                validation_freq=args.eval_every,
            )

            score = early_stopping.best

            return score