Пример #1
0
def test_initializers_with_pattern():

    wide = Wide(100, 1)
    deeptabular = TabMlp(
        mlp_hidden_dims=[32, 16],
        mlp_dropout=[0.5, 0.5],
        column_idx=column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0)
    model = WideDeep(wide=wide,
                     deeptabular=deeptabular,
                     deeptext=deeptext,
                     pred_dim=1)
    cmodel = c(model)
    org_word_embed = []
    for n, p in cmodel.named_parameters():
        if "word_embed" in n:
            org_word_embed.append(p)
    trainer = Trainer(model,
                      objective="binary",
                      verbose=0,
                      initializers=initializers_2)
    init_word_embed = []
    for n, p in trainer.model.named_parameters():
        if "word_embed" in n:
            init_word_embed.append(p)

    assert torch.all(org_word_embed[0] == init_word_embed[0].cpu())
Пример #2
0
def test_initializers_with_pattern():

    wide = Wide(100, 1)
    deepdense = DeepDense(
        hidden_layers=[32, 16],
        dropout=[0.5, 0.5],
        deep_column_idx=deep_column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0)
    model = WideDeep(wide=wide,
                     deepdense=deepdense,
                     deeptext=deeptext,
                     pred_dim=1)
    cmodel = c(model)
    org_word_embed = []
    for n, p in cmodel.named_parameters():
        if "word_embed" in n:
            org_word_embed.append(p)
    model.compile(method="binary", verbose=0, initializers=initializers_2)
    init_word_embed = []
    for n, p in model.named_parameters():
        if "word_embed" in n:
            init_word_embed.append(p)

    assert torch.all(org_word_embed[0] == init_word_embed[0].cpu())
def test_catch_warning():
	with pytest.warns(UserWarning):
		model3 = DeepText(
		    vocab_size=vocab_size,
		    embed_dim=32,
		    embedding_matrix=pretrained_embeddings,
		    padding_idx=0
		    )
def test_catch_warning():
    with pytest.warns(UserWarning):
        model3 = DeepText(
            vocab_size=vocab_size,
            embed_dim=32,
            embed_matrix=pretrained_embeddings,
            padding_idx=0,
        )
    out = model3(torch.from_numpy(padded_sequences))
    assert out.size(0) == 100 and out.size(1) == 64
def test_initializers_1():

	wide = Wide(100, 1)
	deepdense = DeepDense(hidden_layers=[32,16], dropout=[0.5, 0.5], deep_column_idx=deep_column_idx,
	    embed_input=embed_input, continuous_cols=colnames[-5:])
	deeptext = DeepText( vocab_size=vocab_size, embed_dim=32, padding_idx=0)
	deepimage=DeepImage(pretrained=True)
	model = WideDeep(wide=wide, deepdense=deepdense, deeptext=deeptext, deepimage=deepimage, output_dim=1)
	cmodel = c(model)

	org_weights = []
	for n,p in cmodel.named_parameters():
		if n in test_layers_1: org_weights.append(p)

	model.compile(method='binary', verbose=0, initializers=initializers_1)
	init_weights = []
	for n,p in model.named_parameters():
		if n in test_layers_1: init_weights.append(p)

	res = all([torch.all((1-(a==b).int()).bool()) for a,b in zip(org_weights, init_weights)])
	assert res
Пример #6
0
def test_warning_when_missing_initializer():

    wide = Wide(100, 1)
    deeptabular = TabMlp(
        mlp_hidden_dims=[32, 16],
        mlp_dropout=[0.5, 0.5],
        column_idx=column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0)
    model = WideDeep(wide=wide,
                     deeptabular=deeptabular,
                     deeptext=deeptext,
                     pred_dim=1)
    with pytest.warns(UserWarning):
        trainer = Trainer(  # noqa: F841
            model,
            objective="binary",
            verbose=True,
            initializers=initializers_3)
Пример #7
0
def test_initializers_1(initializers, test_layers):

    wide = Wide(np.unique(X_wide).shape[0], 1)
    deeptabular = TabMlp(
        mlp_hidden_dims=[32, 16],
        mlp_dropout=[0.5, 0.5],
        column_idx=column_idx,
        embed_input=embed_input,
        continuous_cols=colnames[-5:],
    )
    deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0)
    deepimage = DeepImage(pretrained=True)
    model = WideDeep(
        wide=wide,
        deeptabular=deeptabular,
        deeptext=deeptext,
        deepimage=deepimage,
        pred_dim=1,
    )
    cmodel = c(model)

    org_weights = []
    for n, p in cmodel.named_parameters():
        if n in test_layers:
            org_weights.append(p)

    trainer = Trainer(model,
                      objective="binary",
                      verbose=0,
                      initializers=initializers)
    init_weights = []
    for n, p in trainer.model.named_parameters():
        if n in test_layers:
            init_weights.append(p)

    res = all([
        torch.all((1 - (a == b).int()).bool())
        for a, b in zip(org_weights, init_weights)
    ])
    assert res
import pytest

from pytorch_widedeep.models import DeepText

padded_sequences = np.random.choice(np.arange(1, 100), (100, 48))
padded_sequences = np.hstack(
    (np.repeat(np.array([[0, 0]]), 100, axis=0), padded_sequences)
)
pretrained_embeddings = np.random.rand(1000, 64).astype("float32")
vocab_size = 1000


###############################################################################
# Without Pretrained Embeddings
###############################################################################
model1 = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0)


def test_deep_text():
    out = model1(torch.from_numpy(padded_sequences))
    assert out.size(0) == 100 and out.size(1) == 64


###############################################################################
# With Pretrained Embeddings
###############################################################################
model2 = DeepText(
    vocab_size=vocab_size, embed_matrix=pretrained_embeddings, padding_idx=0
)

Пример #9
0
        column_idx=tab_preprocessor.column_idx,
        embed_input=tab_preprocessor.embeddings_input,
        continuous_cols=continuous_cols,
    )
    # # To use TabResnet as the deepdense component simply:
    # deepdense = TabResnet(
    #     blocks_dims=[64, 32],
    #     dropout=0.2,
    #     column_idx=tab_preprocessor.column_idx,
    #     embed_input=tab_preprocessor.embeddings_input,
    #     continuous_cols=continuous_cols,
    # )
    deeptext = DeepText(
        vocab_size=len(text_processor.vocab.itos),
        hidden_dim=64,
        n_layers=3,
        rnn_dropout=0.5,
        padding_idx=1,
        embed_matrix=text_processor.embedding_matrix,
    )
    deepimage = DeepImage(pretrained=True, head_hidden_dims=None)
    model = WideDeep(wide=wide,
                     deeptabular=deepdense,
                     deeptext=deeptext,
                     deepimage=deepimage)

    wide_opt = torch.optim.Adam(model.wide.parameters(), lr=0.01)
    deep_opt = torch.optim.Adam(model.deeptabular.parameters())
    text_opt = RAdam(model.deeptext.parameters())
    img_opt = RAdam(model.deepimage.parameters())

    wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=5)
import torch
import pytest

from pytorch_widedeep.models import DeepText

padded_sequences = np.random.choice(np.arange(1,100), (100, 48))
padded_sequences = np.hstack((np.repeat(np.array([[0,0]]), 100, axis=0), padded_sequences))
pretrained_embeddings = np.random.rand(1000, 64)
vocab_size = 1000

###############################################################################
# Without Pretrained Embeddings
###############################################################################
model1 = DeepText(
    vocab_size=vocab_size,
    embed_dim=32,
    padding_idx=0
    )
def test_deep_test():
	out = model1(torch.from_numpy(padded_sequences))
	assert out.size(0)==100 and out.size(1)==64

###############################################################################
# With Pretrained Embeddings
###############################################################################
model2 = DeepText(
    vocab_size=vocab_size,
    embedding_matrix=pretrained_embeddings,
    padding_idx=0
    )
def test_deep_test_pretrained():
Пример #11
0
    X_img_val,
    y_train,
    y_val,
) = train_test_split(X_wide, X_deep, X_text, X_img, target)

# build model components
wide = Wide(np.unique(X_wide).shape[0], 1)
deepdense = DeepDense(
    hidden_layers=[32, 16],
    dropout=[0.5, 0.5],
    deep_column_idx={k: v
                     for v, k in enumerate(colnames)},
    embed_input=embed_input,
    continuous_cols=colnames[-5:],
)
deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0)
deepimage = DeepImage(pretrained=True)

# transforms
mean = [0.406, 0.456, 0.485]  # BGR
std = [0.225, 0.224, 0.229]  # BGR
transforms1 = [ToTensor, Normalize(mean=mean, std=std)]
transforms2 = [Normalize(mean=mean, std=std)]


##############################################################################
# Test many possible scenarios of data inputs I can think off. Surely users
# will input something unexpected
##############################################################################
@pytest.mark.parametrize(
    "X_wide, X_deep, X_text, X_img, X_train, X_val, target, val_split, transforms, nepoch, null",
from pytorch_widedeep.models import (
    Wide,
    DeepText,
    WideDeep,
    DeepDense,
    DeepImage,
)

embed_input = [(u, i, j)
               for u, i, j in zip(["a", "b", "c"][:4], [4] * 3, [8] * 3)]
deep_column_idx = {k: v for v, k in enumerate(["a", "b", "c"])}
wide = Wide(10, 1)
deepdense = DeepDense(hidden_layers=[16, 8],
                      deep_column_idx=deep_column_idx,
                      embed_input=embed_input)
deeptext = DeepText(vocab_size=100, embed_dim=8)
deepimage = DeepImage(pretrained=False)

###############################################################################
#  test raising 'output dim errors'
###############################################################################


@pytest.mark.parametrize(
    "deepcomponent, component_name",
    [
        (None, "dense"),
        (deeptext, "text"),
        (deepimage, "image"),
    ],
)
import pytest

from pytorch_widedeep.models import DeepText

padded_sequences = np.random.choice(np.arange(1, 100), (100, 48))
padded_sequences = np.hstack(
    (np.repeat(np.array([[0, 0]]), 100, axis=0), padded_sequences)
)
pretrained_embeddings = np.random.rand(1000, 64).astype("float32")
vocab_size = 1000


###############################################################################
# Without Pretrained Embeddings
###############################################################################
model1 = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0)


def test_deep_text():
    out = model1(torch.from_numpy(padded_sequences))
    assert out.size(0) == 100 and out.size(1) == 64


###############################################################################
# With Pretrained Embeddings
###############################################################################
model2 = DeepText(
    vocab_size=vocab_size, embedding_matrix=pretrained_embeddings, padding_idx=0
)