示例#1
0
def set_optimizer(model, args):
    if args.optimizer.lower() == "adam":
        return torch.optim.Adam(
            model.parameters(), lr=args.lr, weight_decay=args.weight_decay
        )
    elif args.optimizer.lower() == "adamw":
        return torch.optim.AdamW(
            model.parameters(), lr=args.lr, weight_decay=args.weight_decay
        )
    elif args.optimizer.lower() == "radam":
        return RAdam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay)
    elif args.optimizer.lower() == "usedefault":
        return None
    else:
        raise ValueError("Only Adam, AdamW and RAdam are supported for this experiment")
示例#2
0
# Test that history saves the information adequately
###############################################################################
wide = Wide(100, 1)
deepdense = DeepDense(hidden_layers=[32,16], dropout=[0.5, 0.5], deep_column_idx=deep_column_idx,
    embed_input=embed_input, continuous_cols=colnames[-5:])
model = WideDeep(wide=wide, deepdense=deepdense)

wide_opt_1 = torch.optim.Adam(model.wide.parameters())
deep_opt_1 = torch.optim.Adam(model.deepdense.parameters())
wide_sch_1 = StepLR(wide_opt_1, step_size=4)
deep_sch_1 = CyclicLR(deep_opt_1, base_lr=0.001, max_lr=0.01, step_size_up=10, cycle_momentum=False)
optimizers_1 = {'wide': wide_opt_1, 'deepdense': deep_opt_1}
lr_schedulers_1 = {'wide': wide_sch_1, 'deepdense': deep_sch_1}

wide_opt_2 = torch.optim.Adam(model.wide.parameters())
deep_opt_2 = RAdam(model.deepdense.parameters())
wide_sch_2 = StepLR(wide_opt_2, step_size=4)
deep_sch_2 = StepLR(deep_opt_2, step_size=4)
optimizers_2 = { 'wide': wide_opt_2, 'deepdense':deep_opt_2}
lr_schedulers_2 = {'wide': wide_sch_2,'deepdense': deep_sch_2}


@pytest.mark.parametrize(
    'optimizers, schedulers, len_loss_output, len_lr_output',
    [
    (optimizers_1, lr_schedulers_1, 5, 21),
    (optimizers_2, lr_schedulers_2, 5, 5)
    ]
    )
def test_history_callback(optimizers, schedulers, len_loss_output, len_lr_output):
	model.compile(method='binary', optimizers=optimizers, lr_schedulers=schedulers,
示例#3
0
        vocab_size=len(text_processor.vocab.itos),
        hidden_dim=64,
        n_layers=3,
        rnn_dropout=0.5,
        padding_idx=1,
        embed_matrix=text_processor.embedding_matrix,
    )
    deepimage = DeepImage(pretrained=True, head_hidden_dims=None)
    model = WideDeep(wide=wide,
                     deeptabular=deepdense,
                     deeptext=deeptext,
                     deepimage=deepimage)

    wide_opt = torch.optim.Adam(model.wide.parameters(), lr=0.01)
    deep_opt = torch.optim.Adam(model.deeptabular.parameters())
    text_opt = RAdam(model.deeptext.parameters())
    img_opt = RAdam(model.deepimage.parameters())

    wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=5)
    deep_sch = torch.optim.lr_scheduler.StepLR(deep_opt, step_size=3)
    text_sch = torch.optim.lr_scheduler.StepLR(text_opt, step_size=5)
    img_sch = torch.optim.lr_scheduler.StepLR(img_opt, step_size=3)

    optimizers = {
        "wide": wide_opt,
        "deeptabular": deep_opt,
        "deeptext": text_opt,
        "deepimage": img_opt,
    }
    schedulers = {
        "wide": wide_sch,
示例#4
0
        embed_input=prepare_deep.embeddings_input,
        continuous_cols=continuous_cols,
    )

    # # To use TabResnet as the deeptabular component simply:
    # deeptabular = TabResnet(
    #     blocks_dims=[200, 100],
    #     column_idx=prepare_deep.column_idx,
    #     embed_input=prepare_deep.embeddings_input,
    #     continuous_cols=continuous_cols,
    # )

    model = WideDeep(wide=wide, deeptabular=deeptabular)

    wide_opt = torch.optim.Adam(model.wide.parameters(), lr=0.01)
    deep_opt = RAdam(model.deeptabular.parameters())
    wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=3)
    deep_sch = torch.optim.lr_scheduler.StepLR(deep_opt, step_size=5)

    optimizers = {"wide": wide_opt, "deeptabular": deep_opt}
    schedulers = {"wide": wide_sch, "deeptabular": deep_sch}
    initializers = {"wide": KaimingNormal, "deeptabular": XavierNormal}
    callbacks = [
        LRHistory(n_epochs=10),
        EarlyStopping(patience=5),
        ModelCheckpoint(filepath="model_weights/wd_out"),
    ]
    metrics = [Accuracy, Precision]

    trainer = Trainer(
        model,
###############################################################################
# Test that history saves the information adequately
###############################################################################
wide = Wide(np.unique(X_wide).shape[0], 1)
deepdense = DeepDense(
    hidden_layers=[32, 16],
    dropout=[0.5, 0.5],
    deep_column_idx=deep_column_idx,
    embed_input=embed_input,
    continuous_cols=colnames[-5:],
)
model = WideDeep(wide=wide, deepdense=deepdense)

# 1. Single optimizers_1, single scheduler, not cyclic and both passed directly
optimizers_1 = RAdam(model.parameters())
lr_schedulers_1 = StepLR(optimizers_1, step_size=4)

# 2. Multiple optimizers, single scheduler, cyclic and pass via a 1 item
# dictionary
wide_opt_2 = torch.optim.Adam(model.wide.parameters())
deep_opt_2 = RAdam(model.deepdense.parameters())
deep_sch_2 = CyclicLR(deep_opt_2,
                      base_lr=0.001,
                      max_lr=0.01,
                      step_size_up=5,
                      cycle_momentum=False)
optimizers_2 = {"wide": wide_opt_2, "deepdense": deep_opt_2}
lr_schedulers_2 = {"deepdense": deep_sch_2}

# 3. Multiple schedulers no cyclic
###############################################################################
# Test that history saves the information adequately
###############################################################################
wide = Wide(np.unique(X_wide).shape[0], 1)
deeptabular = TabMlp(
    mlp_hidden_dims=[32, 16],
    mlp_dropout=[0.5, 0.5],
    column_idx=column_idx,
    embed_input=embed_input,
    continuous_cols=colnames[-5:],
)
model = WideDeep(wide=wide, deeptabular=deeptabular)

# 1. Single optimizers_1, single scheduler, not cyclic and both passed directly
optimizers_1 = RAdam(model.parameters())
lr_schedulers_1 = StepLR(optimizers_1, step_size=4)

# 2. Multiple optimizers, single scheduler, cyclic and pass via a 1 item
# dictionary
wide_opt_2 = torch.optim.Adam(model.wide.parameters())
deep_opt_2 = RAdam(model.deeptabular.parameters())
deep_sch_2 = CyclicLR(
    deep_opt_2, base_lr=0.001, max_lr=0.01, step_size_up=5, cycle_momentum=False
)
optimizers_2 = {"wide": wide_opt_2, "deeptabular": deep_opt_2}
lr_schedulers_2 = {"deeptabular": deep_sch_2}

# 3. Multiple schedulers no cyclic
wide_opt_3 = torch.optim.Adam(model.wide.parameters())
deep_opt_3 = RAdam(model.deeptabular.parameters())