Exemplo n.º 1
0
def test_initializers_with_pattern():

    wide = Wide(100, 1)
    deepdense = DeepDense(
        hidden_layers=[32, 16],
        dropout=[0.5, 0.5],
        deep_column_idx=deep_column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0)
    model = WideDeep(wide=wide,
                     deepdense=deepdense,
                     deeptext=deeptext,
                     pred_dim=1)
    cmodel = c(model)
    org_word_embed = []
    for n, p in cmodel.named_parameters():
        if "word_embed" in n:
            org_word_embed.append(p)
    model.compile(method="binary", verbose=0, initializers=initializers_2)
    init_word_embed = []
    for n, p in model.named_parameters():
        if "word_embed" in n:
            init_word_embed.append(p)

    assert torch.all(org_word_embed[0] == init_word_embed[0].cpu())
def test_save_and_load_dict():
    wide = Wide(np.unique(X_wide).shape[0], 1)
    tabmlp = TabMlp(
        mlp_hidden_dims=[32, 16],
        column_idx={k: v
                    for v, k in enumerate(colnames)},
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model1 = WideDeep(wide=deepcopy(wide), deeptabular=deepcopy(tabmlp))
    trainer1 = Trainer(model1, objective="binary", verbose=0)
    trainer1.fit(
        X_wide=X_wide,
        X_tab=X_tab,
        X_text=X_text,
        X_img=X_img,
        target=target,
        batch_size=16,
    )
    wide_weights = model1.wide.wide_linear.weight.data
    trainer1.save_model_state_dict(
        "tests/test_model_functioning/model_dir/model_d.t")
    model2 = WideDeep(wide=wide, deeptabular=tabmlp)
    trainer2 = Trainer(model2, objective="binary", verbose=0)
    trainer2.load_model_state_dict(
        "tests/test_model_functioning/model_dir/model_d.t")
    n_wide_weights = trainer2.model.wide.wide_linear.weight.data

    shutil.rmtree("tests/test_model_functioning/model_dir/")

    assert torch.allclose(wide_weights, n_wide_weights)
def test_no_deephead_and_head_layers():
    out = []
    model = WideDeep(wide=wide, deepdense=deepdense,
                     head_layers=[8, 4])  # noqa: F841
    for n, p in model.named_parameters():
        if n == "deephead.head_layer_0.0.weight":
            out.append(p.size(0) == 8 and p.size(1) == 8)
        if n == "deephead.head_layer_1.0.weight":
            out.append(p.size(0) == 4 and p.size(1) == 8)
    assert all(out)
def test_optimizer_scheduler_format():
    model = WideDeep(deepdense=deepdense)
    optimizers = {
        "deepdense": torch.optim.Adam(model.deepdense.parameters(), lr=0.01)
    }
    schedulers = torch.optim.lr_scheduler.StepLR(optimizers["deepdense"],
                                                 step_size=3)
    with pytest.raises(ValueError):
        model.compile(
            method="binary",
            optimizers=optimizers,
            lr_schedulers=schedulers,
        )
def test_history_callback(deepcomponent, component_name):
    if deepcomponent is None:
        deepcomponent = deepcopy(deepdense)
    deepcomponent.__dict__.pop("output_dim")
    with pytest.raises(AttributeError):
        if component_name == "dense":
            model = WideDeep(wide, deepdense=deepcomponent)
        elif component_name == "text":
            model = WideDeep(wide, deepdense=deepdense, deeptext=deepcomponent)
        elif component_name == "image":
            model = WideDeep(  # noqa: F841
                wide,
                deepdense=deepdense,
                deepimage=deepcomponent)
Exemplo n.º 6
0
def test_fit_methods(
    X_wide,
    X_deep,
    target,
    method,
    X_wide_test,
    X_deep_test,
    X_test,
    pred_dim,
    probs_dim,
):
    wide = Wide(np.unique(X_wide).shape[0], pred_dim)
    deepdense = DeepDense(
        hidden_layers=[32, 16],
        dropout=[0.5, 0.5],
        deep_column_idx=deep_column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model = WideDeep(wide=wide, deepdense=deepdense, pred_dim=pred_dim)
    model.compile(method=method, verbose=0)
    model.fit(X_wide=X_wide, X_deep=X_deep, target=target)
    preds = model.predict(X_wide=X_wide, X_deep=X_deep, X_test=X_test)
    if method == "binary":
        pass
    else:
        probs = model.predict_proba(X_wide=X_wide,
                                    X_deep=X_deep,
                                    X_test=X_test)
    assert preds.shape[0] == 100, probs.shape[1] == probs_dim
def test_filepath_error():
    wide = Wide(np.unique(X_wide).shape[0], 1)
    deepdense = DeepDense(
        hidden_layers=[16, 4],
        deep_column_idx=deep_column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model = WideDeep(wide=wide, deepdense=deepdense)
    with pytest.raises(ValueError):
        model.compile(
            method="binary",
            callbacks=[ModelCheckpoint(filepath="wrong_file_path")],
            verbose=0,
        )
Exemplo n.º 8
0
def test_initializers_with_pattern():

    wide = Wide(100, 1)
    deeptabular = TabMlp(
        mlp_hidden_dims=[32, 16],
        mlp_dropout=[0.5, 0.5],
        column_idx=column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0)
    model = WideDeep(wide=wide,
                     deeptabular=deeptabular,
                     deeptext=deeptext,
                     pred_dim=1)
    cmodel = c(model)
    org_word_embed = []
    for n, p in cmodel.named_parameters():
        if "word_embed" in n:
            org_word_embed.append(p)
    trainer = Trainer(model,
                      objective="binary",
                      verbose=0,
                      initializers=initializers_2)
    init_word_embed = []
    for n, p in trainer.model.named_parameters():
        if "word_embed" in n:
            init_word_embed.append(p)

    assert torch.all(org_word_embed[0] == init_word_embed[0].cpu())
Exemplo n.º 9
0
def test_model_checkpoint(save_best_only, max_save, n_files):
    wide = Wide(np.unique(X_wide).shape[0], 1)
    deeptabular = TabMlp(
        mlp_hidden_dims=[32, 16],
        mlp_dropout=[0.5, 0.5],
        column_idx=column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model = WideDeep(wide=wide, deeptabular=deeptabular)
    trainer = Trainer(
        model=model,
        objective="binary",
        callbacks=[
            ModelCheckpoint(
                "tests/test_model_functioning/weights/test_weights",
                save_best_only=save_best_only,
                max_save=max_save,
            )
        ],
        verbose=0,
    )
    trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, n_epochs=5, val_split=0.2)
    n_saved = len(os.listdir("tests/test_model_functioning/weights/"))

    shutil.rmtree("tests/test_model_functioning/weights/")

    assert n_saved <= n_files
Exemplo n.º 10
0
def test_fit_objectives_tab_transformer(
    X_wide,
    X_tab,
    target,
    objective,
    X_wide_test,
    X_tab_test,
    X_test,
    pred_dim,
    probs_dim,
):
    wide = Wide(np.unique(X_wide).shape[0], pred_dim)
    tab_transformer = TabTransformer(
        column_idx={k: v
                    for v, k in enumerate(colnames)},
        embed_input=embed_input_tt,
        continuous_cols=colnames[5:],
    )
    model = WideDeep(wide=wide, deeptabular=tab_transformer, pred_dim=pred_dim)
    trainer = Trainer(model, objective=objective, verbose=0)
    trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, batch_size=16)
    preds = trainer.predict(X_wide=X_wide, X_tab=X_tab, X_test=X_test)
    if objective == "binary":
        pass
    else:
        probs = trainer.predict_proba(X_wide=X_wide,
                                      X_tab=X_tab,
                                      X_test=X_test)
    assert preds.shape[0] == 32, probs.shape[1] == probs_dim
Exemplo n.º 11
0
def test_basic_run_with_metrics_multiclass():
    wide = Wide(np.unique(X_wide).shape[0], 3)
    deeptabular = TabMlp(
        mlp_hidden_dims=[32, 16],
        mlp_dropout=[0.5, 0.5],
        column_idx={k: v
                    for v, k in enumerate(colnames)},
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model = WideDeep(wide=wide, deeptabular=deeptabular, pred_dim=3)
    trainer = Trainer(model,
                      objective="multiclass",
                      metrics=[Accuracy],
                      verbose=False)
    trainer.fit(
        X_wide=X_wide,
        X_tab=X_tab,
        target=target_multi,
        n_epochs=1,
        batch_size=16,
        val_split=0.2,
    )
    assert ("train_loss" in trainer.history.keys()
            and "train_acc" in trainer.history.keys())
def set_model(args, prepare_tab):

    if args.blocks_dims == "same":
        n_inp_dim = sum([e[2] for e in prepare_tab.embeddings_input])
        blocks_dims = [n_inp_dim, n_inp_dim, n_inp_dim]
    else:
        blocks_dims = eval(args.blocks_dims)

    if args.mlp_hidden_dims == "auto":
        n_inp_dim = blocks_dims[-1]
        mlp_hidden_dims = [4 * n_inp_dim, 2 * n_inp_dim]
    else:
        mlp_hidden_dims = eval(args.mlp_hidden_dims)

    deeptabular = TabResnet(
        embed_input=prepare_tab.embeddings_input,
        column_idx=prepare_tab.column_idx,
        blocks_dims=blocks_dims,
        blocks_dropout=args.blocks_dropout,
        mlp_hidden_dims=mlp_hidden_dims,
        mlp_activation=args.mlp_activation,
        mlp_dropout=args.mlp_dropout,
        mlp_batchnorm=args.mlp_batchnorm,
        mlp_batchnorm_last=args.mlp_batchnorm_last,
        mlp_linear_first=args.mlp_linear_first,
        embed_dropout=args.embed_dropout,
    )
    model = WideDeep(deeptabular=deeptabular)

    return model
Exemplo n.º 13
0
def test_widedeep_inputs(
    X_wide,
    X_tab,
    X_text,
    X_img,
    X_train,
    X_val,
    target,
    val_split,
    transforms,
):
    model = WideDeep(wide=wide,
                     deeptabular=deeptabular,
                     deeptext=deeptext,
                     deepimage=deepimage)
    trainer = Trainer(model,
                      objective="binary",
                      transforms=transforms,
                      verbose=0)
    trainer.fit(
        X_wide=X_wide,
        X_tab=X_tab,
        X_text=X_text,
        X_img=X_img,
        X_train=X_train,
        X_val=X_val,
        target=target,
        val_split=val_split,
        batch_size=16,
    )
    assert trainer.history["train_loss"] is not None
Exemplo n.º 14
0
def test_fit_objectives(
    X_wide,
    X_tab,
    target,
    objective,
    X_wide_test,
    X_tab_test,
    X_test,
    pred_dim,
    probs_dim,
):
    wide = Wide(np.unique(X_wide).shape[0], pred_dim)
    deeptabular = TabMlp(
        mlp_hidden_dims=[32, 16],
        mlp_dropout=[0.5, 0.5],
        column_idx=column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model = WideDeep(wide=wide, deeptabular=deeptabular, pred_dim=pred_dim)
    trainer = Trainer(model, objective=objective, verbose=0)
    trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, batch_size=16)
    preds = trainer.predict(X_wide=X_wide, X_tab=X_tab, X_test=X_test)
    if objective == "binary":
        pass
    else:
        probs = trainer.predict_proba(X_wide=X_wide,
                                      X_tab=X_tab,
                                      X_test=X_test)
    assert preds.shape[0] == 32, probs.shape[1] == probs_dim
Exemplo n.º 15
0
def test_xtrain_xval_assertion(
    X_wide,
    X_tab,
    X_text,
    X_img,
    X_train,
    X_val,
    target,
):
    model = WideDeep(wide=wide,
                     deeptabular=deeptabular,
                     deeptext=deeptext,
                     deepimage=deepimage)
    trainer = Trainer(model, objective="binary", verbose=0)
    with pytest.raises(AssertionError):
        trainer.fit(
            X_wide=X_wide,
            X_tab=X_tab,
            X_text=X_text,
            X_img=X_img,
            X_train=X_train,
            X_val=X_val,
            target=target,
            batch_size=16,
        )
Exemplo n.º 16
0
def test_deephead_and_head_layers():
    deephead = nn.Sequential(nn.Linear(32, 16), nn.Linear(16, 8))
    with pytest.warns(UserWarning):
        model = WideDeep(  # noqa: F841
            wide=wide,
            deepdense=deepdense,
            head_layers=[16, 8],
            deephead=deephead)
Exemplo n.º 17
0
def test_deephead_and_head_layers():
    deephead = nn.Sequential(nn.Linear(32, 16), nn.Linear(16, 8))
    with pytest.raises(ValueError):
        model = WideDeep(  # noqa: F841
            wide=wide,
            deepdense=deepdense,
            head_layers=[16, 8],
            deephead=deephead)
Exemplo n.º 18
0
def test_save_and_load():
    model = WideDeep(wide=wide, deeptabular=tabmlp)
    trainer = Trainer(model, objective="binary", verbose=0)
    trainer.fit(X_wide=X_wide, X_tab=X_tab, target=target, batch_size=16)
    wide_weights = model.wide.wide_linear.weight.data
    trainer.save_model("tests/test_model_functioning/model_dir/model.t")
    n_model = Trainer.load_model(
        "tests/test_model_functioning/model_dir/model.t")
    n_wide_weights = n_model.wide.wide_linear.weight.data
    assert torch.allclose(wide_weights, n_wide_weights)
def test_initializers_1():

	wide = Wide(100, 1)
	deepdense = DeepDense(hidden_layers=[32,16], dropout=[0.5, 0.5], deep_column_idx=deep_column_idx,
	    embed_input=embed_input, continuous_cols=colnames[-5:])
	deeptext = DeepText( vocab_size=vocab_size, embed_dim=32, padding_idx=0)
	deepimage=DeepImage(pretrained=True)
	model = WideDeep(wide=wide, deepdense=deepdense, deeptext=deeptext, deepimage=deepimage, output_dim=1)
	cmodel = c(model)

	org_weights = []
	for n,p in cmodel.named_parameters():
		if n in test_layers_1: org_weights.append(p)

	model.compile(method='binary', verbose=0, initializers=initializers_1)
	init_weights = []
	for n,p in model.named_parameters():
		if n in test_layers_1: init_weights.append(p)

	res = all([torch.all((1-(a==b).int()).bool()) for a,b in zip(org_weights, init_weights)])
	assert res
def test_xtrain_xval_assertion(
    X_wide,
    X_deep,
    X_text,
    X_img,
    X_train,
    X_val,
    target,
):
    model = WideDeep(wide=wide,
                     deepdense=deepdense,
                     deeptext=deeptext,
                     deepimage=deepimage)
    model.compile(method="binary", verbose=0)
    with pytest.raises(AssertionError):
        model.fit(
            X_wide=X_wide,
            X_deep=X_deep,
            X_text=X_text,
            X_img=X_img,
            X_train=X_train,
            X_val=X_val,
            target=target,
            batch_size=16,
        )
Exemplo n.º 21
0
def test_early_stop():
    wide = Wide(100, 1)
    deepdense = DeepDense(
        hidden_layers=[32, 16],
        dropout=[0.5, 0.5],
        deep_column_idx=deep_column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model = WideDeep(wide=wide, deepdense=deepdense)
    model.compile(
        method="binary",
        callbacks=[
            EarlyStopping(min_delta=0.1,
                          patience=3,
                          restore_best_weights=True,
                          verbose=1)
        ],
        verbose=1,
    )
    model.fit(X_wide=X_wide,
              X_deep=X_deep,
              target=target,
              val_split=0.2,
              n_epochs=5)
    # length of history = patience+1
    assert len(model.history._history["train_loss"]) == 3 + 1
Exemplo n.º 22
0
def test_widedeep_inputs(
    X_wide,
    X_deep,
    X_text,
    X_img,
    X_train,
    X_val,
    target,
    val_split,
    transforms,
    nepoch,
    null,
):
    model = WideDeep(wide=wide,
                     deepdense=deepdense,
                     deeptext=deeptext,
                     deepimage=deepimage)
    model.compile(method="binary", transforms=transforms, verbose=0)
    model.fit(
        X_wide=X_wide,
        X_deep=X_deep,
        X_text=X_text,
        X_img=X_img,
        X_train=X_train,
        X_val=X_val,
        target=target,
        val_split=val_split,
        batch_size=16,
    )
    assert (model.history.epoch[0] == nepoch
            and model.history._history["train_loss"] is not null)
Exemplo n.º 23
0
def test_model_checkpoint(save_best_only, max_save, n_files):
    wide = Wide(100, 1)
    deepdense = DeepDense(
        hidden_layers=[32, 16],
        dropout=[0.5, 0.5],
        deep_column_idx=deep_column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model = WideDeep(wide=wide, deepdense=deepdense)
    model.compile(
        method="binary",
        callbacks=[
            ModelCheckpoint("weights/test_weights",
                            save_best_only=save_best_only,
                            max_save=max_save)
        ],
        verbose=0,
    )
    model.fit(X_wide=X_wide,
              X_deep=X_deep,
              target=target,
              n_epochs=5,
              val_split=0.2)
    n_saved = len(os.listdir("weights/"))
    for f in os.listdir("weights/"):
        os.remove("weights/" + f)
    assert n_saved <= n_files
Exemplo n.º 24
0
def test_focal_loss(X_wide, X_deep, target, method, output_dim, probs_dim):
    wide = Wide(100, output_dim)
    deepdense = model3 = DeepDense(hidden_layers=[32, 16],
                                   dropout=[0.5, 0.5],
                                   deep_column_idx=deep_column_idx,
                                   embed_input=embed_input,
                                   continuous_cols=colnames[-5:])
    model = WideDeep(wide=wide, deepdense=deepdense, output_dim=output_dim)
    model.compile(method=method, verbose=0, with_focal_loss=True)
    model.fit(X_wide=X_wide, X_deep=X_deep, target=target)
    probs = model.predict_proba(X_wide=X_wide, X_deep=X_deep)
    assert probs.shape[1] == probs_dim
Exemplo n.º 25
0
def test_optimizer_scheduler_format():
    model = WideDeep(deeptabular=tabmlp)
    optimizers = {
        "deeptabular": torch.optim.Adam(model.deeptabular.parameters(),
                                        lr=0.01)
    }
    schedulers = torch.optim.lr_scheduler.StepLR(optimizers["deeptabular"],
                                                 step_size=3)
    with pytest.raises(ValueError):
        trainer = Trainer(  # noqa: F841
            model,
            objective="binary",
            optimizers=optimizers,
            lr_schedulers=schedulers,
        )
Exemplo n.º 26
0
def test_basic_run_with_metrics_binary(wide, deeptabular):
    model = WideDeep(wide=wide, deeptabular=deeptabular)
    trainer = Trainer(model,
                      objective="binary",
                      metrics=[Accuracy],
                      verbose=False)
    trainer.fit(
        X_wide=X_wide,
        X_tab=X_tab,
        target=target,
        n_epochs=1,
        batch_size=16,
        val_split=0.2,
    )
    assert ("train_loss" in trainer.history.keys()
            and "train_acc" in trainer.history.keys())
Exemplo n.º 27
0
def test_filepath_error():
    wide = Wide(np.unique(X_wide).shape[0], 1)
    deeptabular = TabMlp(
        mlp_hidden_dims=[16, 4],
        column_idx=column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    model = WideDeep(wide=wide, deeptabular=deeptabular)
    with pytest.raises(ValueError):
        trainer = Trainer(  # noqa: F841
            model=model,
            objective="binary",
            callbacks=[ModelCheckpoint(filepath="wrong_file_path")],
            verbose=0,
        )
Exemplo n.º 28
0
def test_individual_inputs(wide, deeptabular, deeptext, deepimage, X_wide,
                           X_tab, X_text, X_img, target):
    model = WideDeep(wide=wide,
                     deeptabular=deeptabular,
                     deeptext=deeptext,
                     deepimage=deepimage)
    trainer = Trainer(model, objective="binary", verbose=0)
    trainer.fit(
        X_wide=X_wide,
        X_tab=X_tab,
        X_text=X_text,
        X_img=X_img,
        target=target,
        batch_size=16,
    )
    # check it has run succesfully
    assert len(trainer.history) == 1
def test_basic_run_with_metrics_binary():
    model = WideDeep(wide=wide, deepdense=deepdense)
    model.compile(method="binary", metrics=[Accuracy], verbose=False)
    model.fit(
        X_wide=X_wide,
        X_deep=X_deep,
        target=target,
        n_epochs=1,
        batch_size=16,
        val_split=0.2,
    )
    assert ("train_loss" in model.history._history.keys()
            and "train_acc" in model.history._history.keys())
def test_fit_with_deephead():
    wide = Wide(np.unique(X_wide).shape[0], 1)
    deepdense = DeepDense(
        hidden_layers=[32, 16],
        deep_column_idx=deep_column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    deephead = nn.Sequential(nn.Linear(16, 8), nn.Linear(8, 4))
    model = WideDeep(wide=wide,
                     deepdense=deepdense,
                     pred_dim=1,
                     deephead=deephead)
    model.compile(method="binary", verbose=0)
    model.fit(X_wide=X_wide,
              X_deep=X_deep,
              target=target_binary,
              batch_size=16)
    preds = model.predict(X_wide=X_wide, X_deep=X_deep, X_test=X_test)
    probs = model.predict_proba(X_wide=X_wide, X_deep=X_deep, X_test=X_test)
    assert preds.shape[0] == 32, probs.shape[1] == 2