def set_optimizer(model, args): if args.optimizer.lower() == "adam": return torch.optim.Adam( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) elif args.optimizer.lower() == "adamw": return torch.optim.AdamW( model.parameters(), lr=args.lr, weight_decay=args.weight_decay ) elif args.optimizer.lower() == "radam": return RAdam(model.parameters(), lr=args.lr, weight_decay=args.weight_decay) elif args.optimizer.lower() == "usedefault": return None else: raise ValueError("Only Adam, AdamW and RAdam are supported for this experiment")
# Test that history saves the information adequately ############################################################################### wide = Wide(100, 1) deepdense = DeepDense(hidden_layers=[32,16], dropout=[0.5, 0.5], deep_column_idx=deep_column_idx, embed_input=embed_input, continuous_cols=colnames[-5:]) model = WideDeep(wide=wide, deepdense=deepdense) wide_opt_1 = torch.optim.Adam(model.wide.parameters()) deep_opt_1 = torch.optim.Adam(model.deepdense.parameters()) wide_sch_1 = StepLR(wide_opt_1, step_size=4) deep_sch_1 = CyclicLR(deep_opt_1, base_lr=0.001, max_lr=0.01, step_size_up=10, cycle_momentum=False) optimizers_1 = {'wide': wide_opt_1, 'deepdense': deep_opt_1} lr_schedulers_1 = {'wide': wide_sch_1, 'deepdense': deep_sch_1} wide_opt_2 = torch.optim.Adam(model.wide.parameters()) deep_opt_2 = RAdam(model.deepdense.parameters()) wide_sch_2 = StepLR(wide_opt_2, step_size=4) deep_sch_2 = StepLR(deep_opt_2, step_size=4) optimizers_2 = { 'wide': wide_opt_2, 'deepdense':deep_opt_2} lr_schedulers_2 = {'wide': wide_sch_2,'deepdense': deep_sch_2} @pytest.mark.parametrize( 'optimizers, schedulers, len_loss_output, len_lr_output', [ (optimizers_1, lr_schedulers_1, 5, 21), (optimizers_2, lr_schedulers_2, 5, 5) ] ) def test_history_callback(optimizers, schedulers, len_loss_output, len_lr_output): model.compile(method='binary', optimizers=optimizers, lr_schedulers=schedulers,
vocab_size=len(text_processor.vocab.itos), hidden_dim=64, n_layers=3, rnn_dropout=0.5, padding_idx=1, embed_matrix=text_processor.embedding_matrix, ) deepimage = DeepImage(pretrained=True, head_hidden_dims=None) model = WideDeep(wide=wide, deeptabular=deepdense, deeptext=deeptext, deepimage=deepimage) wide_opt = torch.optim.Adam(model.wide.parameters(), lr=0.01) deep_opt = torch.optim.Adam(model.deeptabular.parameters()) text_opt = RAdam(model.deeptext.parameters()) img_opt = RAdam(model.deepimage.parameters()) wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=5) deep_sch = torch.optim.lr_scheduler.StepLR(deep_opt, step_size=3) text_sch = torch.optim.lr_scheduler.StepLR(text_opt, step_size=5) img_sch = torch.optim.lr_scheduler.StepLR(img_opt, step_size=3) optimizers = { "wide": wide_opt, "deeptabular": deep_opt, "deeptext": text_opt, "deepimage": img_opt, } schedulers = { "wide": wide_sch,
embed_input=prepare_deep.embeddings_input, continuous_cols=continuous_cols, ) # # To use TabResnet as the deeptabular component simply: # deeptabular = TabResnet( # blocks_dims=[200, 100], # column_idx=prepare_deep.column_idx, # embed_input=prepare_deep.embeddings_input, # continuous_cols=continuous_cols, # ) model = WideDeep(wide=wide, deeptabular=deeptabular) wide_opt = torch.optim.Adam(model.wide.parameters(), lr=0.01) deep_opt = RAdam(model.deeptabular.parameters()) wide_sch = torch.optim.lr_scheduler.StepLR(wide_opt, step_size=3) deep_sch = torch.optim.lr_scheduler.StepLR(deep_opt, step_size=5) optimizers = {"wide": wide_opt, "deeptabular": deep_opt} schedulers = {"wide": wide_sch, "deeptabular": deep_sch} initializers = {"wide": KaimingNormal, "deeptabular": XavierNormal} callbacks = [ LRHistory(n_epochs=10), EarlyStopping(patience=5), ModelCheckpoint(filepath="model_weights/wd_out"), ] metrics = [Accuracy, Precision] trainer = Trainer( model,
############################################################################### # Test that history saves the information adequately ############################################################################### wide = Wide(np.unique(X_wide).shape[0], 1) deepdense = DeepDense( hidden_layers=[32, 16], dropout=[0.5, 0.5], deep_column_idx=deep_column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) model = WideDeep(wide=wide, deepdense=deepdense) # 1. Single optimizers_1, single scheduler, not cyclic and both passed directly optimizers_1 = RAdam(model.parameters()) lr_schedulers_1 = StepLR(optimizers_1, step_size=4) # 2. Multiple optimizers, single scheduler, cyclic and pass via a 1 item # dictionary wide_opt_2 = torch.optim.Adam(model.wide.parameters()) deep_opt_2 = RAdam(model.deepdense.parameters()) deep_sch_2 = CyclicLR(deep_opt_2, base_lr=0.001, max_lr=0.01, step_size_up=5, cycle_momentum=False) optimizers_2 = {"wide": wide_opt_2, "deepdense": deep_opt_2} lr_schedulers_2 = {"deepdense": deep_sch_2} # 3. Multiple schedulers no cyclic
############################################################################### # Test that history saves the information adequately ############################################################################### wide = Wide(np.unique(X_wide).shape[0], 1) deeptabular = TabMlp( mlp_hidden_dims=[32, 16], mlp_dropout=[0.5, 0.5], column_idx=column_idx, embed_input=embed_input, continuous_cols=colnames[-5:], ) model = WideDeep(wide=wide, deeptabular=deeptabular) # 1. Single optimizers_1, single scheduler, not cyclic and both passed directly optimizers_1 = RAdam(model.parameters()) lr_schedulers_1 = StepLR(optimizers_1, step_size=4) # 2. Multiple optimizers, single scheduler, cyclic and pass via a 1 item # dictionary wide_opt_2 = torch.optim.Adam(model.wide.parameters()) deep_opt_2 = RAdam(model.deeptabular.parameters()) deep_sch_2 = CyclicLR( deep_opt_2, base_lr=0.001, max_lr=0.01, step_size_up=5, cycle_momentum=False ) optimizers_2 = {"wide": wide_opt_2, "deeptabular": deep_opt_2} lr_schedulers_2 = {"deeptabular": deep_sch_2} # 3. Multiple schedulers no cyclic wide_opt_3 = torch.optim.Adam(model.wide.parameters()) deep_opt_3 = RAdam(model.deeptabular.parameters())