def set_model(args, prepare_tab):

    if args.blocks_dims == "same":
        n_inp_dim = sum([e[2] for e in prepare_tab.embeddings_input])
        blocks_dims = [n_inp_dim, n_inp_dim, n_inp_dim]
    else:
        blocks_dims = eval(args.blocks_dims)

    if args.mlp_hidden_dims == "auto":
        n_inp_dim = blocks_dims[-1]
        mlp_hidden_dims = [4 * n_inp_dim, 2 * n_inp_dim]
    else:
        mlp_hidden_dims = eval(args.mlp_hidden_dims)

    deeptabular = TabResnet(
        embed_input=prepare_tab.embeddings_input,
        column_idx=prepare_tab.column_idx,
        blocks_dims=blocks_dims,
        blocks_dropout=args.blocks_dropout,
        mlp_hidden_dims=mlp_hidden_dims,
        mlp_activation=args.mlp_activation,
        mlp_dropout=args.mlp_dropout,
        mlp_batchnorm=args.mlp_batchnorm,
        mlp_batchnorm_last=args.mlp_batchnorm_last,
        mlp_linear_first=args.mlp_linear_first,
        embed_dropout=args.embed_dropout,
    )
    model = WideDeep(deeptabular=deeptabular)

    return model
Пример #2
0
def test_full_setup(concat_cont_first):
    model4 = TabResnet(
        embed_input=embed_input,
        column_idx={k: v
                    for v, k in enumerate(colnames)},
        blocks_dims=[32, 16, 8],
        blocks_dropout=0.5,
        mlp_dropout=0.5,
        mlp_hidden_dims=[32, 16],
        mlp_batchnorm=True,
        mlp_batchnorm_last=False,
        embed_dropout=0.1,
        continuous_cols=continuous_cols,
        batchnorm_cont=True,
        concat_cont_first=concat_cont_first,
    )
    out = model4(X_tab)

    true_mlp_inp_dim = list(
        model4.tab_resnet_mlp.mlp.dense_layer_0.parameters())[2].size(1)

    if concat_cont_first:
        expected_mlp_inp_dim = model4.blocks_dims[-1]
    else:
        expected_mlp_inp_dim = model4.blocks_dims[-1] + len(continuous_cols)

    assert (out.size(0) == 10 and out.size(1) == model4.output_dim
            and expected_mlp_inp_dim == true_mlp_inp_dim)
Пример #3
0
def test_cont_contat(concat_cont_first):
    model3 = TabResnet(
        blocks_dims=[32, 16, 8],
        blocks_dropout=0.5,
        mlp_dropout=0.5,
        column_idx={k: v
                    for v, k in enumerate(colnames)},
        embed_input=embed_input,
        continuous_cols=continuous_cols,
        concat_cont_first=concat_cont_first,
    )
    out = model3(X_tab)

    assert out.size(0) == 10 and out.size(1) == model3.output_dim
    blocks_dims = eval(args.blocks_dims)

if args.mlp_hidden_dims == "auto":
    n_inp_dim = blocks_dims[-1]
    mlp_hidden_dims = [4 * n_inp_dim, 2 * n_inp_dim]
else:
    mlp_hidden_dims = eval(args.mlp_hidden_dims)

deeptabular = TabResnet(
    embed_input=prepare_tab.embeddings_input,
    column_idx=prepare_tab.column_idx,
    blocks_dims=blocks_dims,
    blocks_dropout=args.blocks_dropout,
    mlp_hidden_dims=mlp_hidden_dims,
    mlp_activation=args.mlp_activation,
    mlp_dropout=args.mlp_dropout,
    mlp_batchnorm=args.mlp_batchnorm,
    mlp_batchnorm_last=args.mlp_batchnorm_last,
    mlp_linear_first=args.mlp_linear_first,
    embed_dropout=args.embed_dropout,
    continuous_cols=prepare_tab.continuous_cols,
    batchnorm_cont=args.batchnorm_cont,
    concat_cont_first=args.concat_cont_first,
)
model = WideDeep(deeptabular=deeptabular)

optimizers = set_optimizer(model, args)

steps_per_epoch = (X_train.shape[0] // args.batch_size) + 1
lr_schedulers = set_lr_scheduler(optimizers, steps_per_epoch, args)
early_stopping = EarlyStopping(
    monitor=args.monitor,
Пример #5
0
colnames = list(string.ascii_lowercase)[:10]
embed_cols = [np.random.choice(np.arange(5), 10) for _ in range(5)]
cont_cols = [np.random.rand(10) for _ in range(5)]

X_tab = torch.from_numpy(np.vstack(embed_cols + cont_cols).transpose())
X_tab_emb = X_tab[:, :5]
X_tab_cont = X_tab[:, 5:]

###############################################################################
# Embeddings and no continuous_cols
###############################################################################
embed_input = [(u, i, j) for u, i, j in zip(colnames[:5], [5] * 5, [16] * 5)]
model1 = TabResnet(
    blocks_dims=[32, 16],
    blocks_dropout=0.5,
    mlp_dropout=0.5,
    column_idx={k: v
                for v, k in enumerate(colnames[:5])},
    embed_input=embed_input,
)


def test_tab_resnet_embed():
    out = model1(X_tab_emb)
    assert out.size(0) == 10 and out.size(1) == 16


###############################################################################
# Continous Cols and Embeddings
###############################################################################
continuous_cols = colnames[-5:]
model2 = TabResnet(
) = train_test_split(X_wide, X_tab, X_text, X_img, target)

# build model components
wide = Wide(np.unique(X_wide).shape[0], 1)
tabmlp = TabMlp(
    mlp_hidden_dims=[32, 16],
    mlp_dropout=[0.5, 0.5],
    column_idx={k: v
                for v, k in enumerate(colnames)},
    embed_input=embed_input,
    continuous_cols=colnames[-5:],
)
tabresnet = TabResnet(
    blocks_dims=[32, 16],
    column_idx={k: v
                for v, k in enumerate(colnames)},
    embed_input=embed_input,
    continuous_cols=colnames[-5:],
)
tabtransformer = TabTransformer(
    column_idx={k: v
                for v, k in enumerate(colnames)},
    embed_input=embed_input_tt,
    continuous_cols=colnames[5:],
)
deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0)
deepimage = DeepImage(pretrained=True)

###############################################################################
#  test consistecy between optimizers and lr_schedulers format
###############################################################################