def set_model(args, prepare_tab): if args.blocks_dims == "same": n_inp_dim = sum([e[2] for e in prepare_tab.embeddings_input]) blocks_dims = [n_inp_dim, n_inp_dim, n_inp_dim] else: blocks_dims = eval(args.blocks_dims) if args.mlp_hidden_dims == "auto": n_inp_dim = blocks_dims[-1] mlp_hidden_dims = [4 * n_inp_dim, 2 * n_inp_dim] else: mlp_hidden_dims = eval(args.mlp_hidden_dims) deeptabular = TabResnet( embed_input=prepare_tab.embeddings_input, column_idx=prepare_tab.column_idx, blocks_dims=blocks_dims, blocks_dropout=args.blocks_dropout, mlp_hidden_dims=mlp_hidden_dims, mlp_activation=args.mlp_activation, mlp_dropout=args.mlp_dropout, mlp_batchnorm=args.mlp_batchnorm, mlp_batchnorm_last=args.mlp_batchnorm_last, mlp_linear_first=args.mlp_linear_first, embed_dropout=args.embed_dropout, ) model = WideDeep(deeptabular=deeptabular) return model
def test_full_setup(concat_cont_first): model4 = TabResnet( embed_input=embed_input, column_idx={k: v for v, k in enumerate(colnames)}, blocks_dims=[32, 16, 8], blocks_dropout=0.5, mlp_dropout=0.5, mlp_hidden_dims=[32, 16], mlp_batchnorm=True, mlp_batchnorm_last=False, embed_dropout=0.1, continuous_cols=continuous_cols, batchnorm_cont=True, concat_cont_first=concat_cont_first, ) out = model4(X_tab) true_mlp_inp_dim = list( model4.tab_resnet_mlp.mlp.dense_layer_0.parameters())[2].size(1) if concat_cont_first: expected_mlp_inp_dim = model4.blocks_dims[-1] else: expected_mlp_inp_dim = model4.blocks_dims[-1] + len(continuous_cols) assert (out.size(0) == 10 and out.size(1) == model4.output_dim and expected_mlp_inp_dim == true_mlp_inp_dim)
def test_cont_contat(concat_cont_first): model3 = TabResnet( blocks_dims=[32, 16, 8], blocks_dropout=0.5, mlp_dropout=0.5, column_idx={k: v for v, k in enumerate(colnames)}, embed_input=embed_input, continuous_cols=continuous_cols, concat_cont_first=concat_cont_first, ) out = model3(X_tab) assert out.size(0) == 10 and out.size(1) == model3.output_dim
blocks_dims = eval(args.blocks_dims) if args.mlp_hidden_dims == "auto": n_inp_dim = blocks_dims[-1] mlp_hidden_dims = [4 * n_inp_dim, 2 * n_inp_dim] else: mlp_hidden_dims = eval(args.mlp_hidden_dims) deeptabular = TabResnet( embed_input=prepare_tab.embeddings_input, column_idx=prepare_tab.column_idx, blocks_dims=blocks_dims, blocks_dropout=args.blocks_dropout, mlp_hidden_dims=mlp_hidden_dims, mlp_activation=args.mlp_activation, mlp_dropout=args.mlp_dropout, mlp_batchnorm=args.mlp_batchnorm, mlp_batchnorm_last=args.mlp_batchnorm_last, mlp_linear_first=args.mlp_linear_first, embed_dropout=args.embed_dropout, continuous_cols=prepare_tab.continuous_cols, batchnorm_cont=args.batchnorm_cont, concat_cont_first=args.concat_cont_first, ) model = WideDeep(deeptabular=deeptabular) optimizers = set_optimizer(model, args) steps_per_epoch = (X_train.shape[0] // args.batch_size) + 1 lr_schedulers = set_lr_scheduler(optimizers, steps_per_epoch, args) early_stopping = EarlyStopping( monitor=args.monitor,
colnames = list(string.ascii_lowercase)[:10] embed_cols = [np.random.choice(np.arange(5), 10) for _ in range(5)] cont_cols = [np.random.rand(10) for _ in range(5)] X_tab = torch.from_numpy(np.vstack(embed_cols + cont_cols).transpose()) X_tab_emb = X_tab[:, :5] X_tab_cont = X_tab[:, 5:] ############################################################################### # Embeddings and no continuous_cols ############################################################################### embed_input = [(u, i, j) for u, i, j in zip(colnames[:5], [5] * 5, [16] * 5)] model1 = TabResnet( blocks_dims=[32, 16], blocks_dropout=0.5, mlp_dropout=0.5, column_idx={k: v for v, k in enumerate(colnames[:5])}, embed_input=embed_input, ) def test_tab_resnet_embed(): out = model1(X_tab_emb) assert out.size(0) == 10 and out.size(1) == 16 ############################################################################### # Continous Cols and Embeddings ############################################################################### continuous_cols = colnames[-5:] model2 = TabResnet(
) = train_test_split(X_wide, X_tab, X_text, X_img, target) # build model components wide = Wide(np.unique(X_wide).shape[0], 1) tabmlp = TabMlp( mlp_hidden_dims=[32, 16], mlp_dropout=[0.5, 0.5], column_idx={k: v for v, k in enumerate(colnames)}, embed_input=embed_input, continuous_cols=colnames[-5:], ) tabresnet = TabResnet( blocks_dims=[32, 16], column_idx={k: v for v, k in enumerate(colnames)}, embed_input=embed_input, continuous_cols=colnames[-5:], ) tabtransformer = TabTransformer( column_idx={k: v for v, k in enumerate(colnames)}, embed_input=embed_input_tt, continuous_cols=colnames[5:], ) deeptext = DeepText(vocab_size=vocab_size, embed_dim=32, padding_idx=0) deepimage = DeepImage(pretrained=True) ############################################################################### # test consistecy between optimizers and lr_schedulers format ###############################################################################