clr = cyclical_lr(step_size, min_lr=end_lr/factor, max_lr=end_lr) #, decay_factor_per_step=.97) print("Step-size: {}, lr: {} -> {}".format(step_size, end_lr/factor, end_lr)) lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr]) """ optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, amsgrad=True)#, weight_decay=1e-3) #optimizer = torch.optim.SGD(model.parameters(), lr=.1, momentum=0.9) lr_scheduler = None #from models.components.criteria.SmoothedCrossEntropyLoss import SmoothedCrossEntropyLoss #criterion = SmoothedCrossEntropyLoss(ignore_index=0, label_smoothing=0.9) #criterion = SmoothedCrossEntropyLoss(label_smoothing=1.) # simple crossentropy, no ignore index set #criterion = SmoothedCrossEntropyLoss(ignore_index=0, label_smoothing=1.) # simple crossentropy, with ignore index set criterion = nn.NLLLoss(ignore_index=tgt_lookup.convert_tokens_to_ids(tgt_lookup.pad_token)) train(model, train_loader, valid_loader, test_loader, model_store_path = os.path.join("..", "..", "train", "lstm_fa_pn"), resume = False, max_epochs = 500, patience = 35, optimizer = optimizer, lr_scheduler = lr_scheduler, tf_start_ratio=.5, tf_end_ratio=.1, tf_epochs_decay=50) # ######################################################################
valid_loader.dataset.X = valid_loader.dataset.X[0:300] valid_loader.dataset.y = valid_loader.dataset.y[0:300] n_class = len(src_w2i) n_emb_dim = 300 n_hidden = 128 n_lstm_units = 2 n_lstm_dropout = 0.2 n_dropout = 0.3 model = LSTMAttnEncoderDecoder(n_class, n_emb_dim, n_hidden, n_lstm_units, n_lstm_dropout, n_dropout) epochs = 500 lr = 0.1 train(model, epochs, lr, batch_size, n_class, train_loader, valid_loader, tgt_i2w) # embedding_dim = 512 # encoder_hidden_dim = 512 # decoder_hidden_dim = 512 # encoder_n_layers = 2 # decoder_n_layers = 1 # encoder_drop_prob = 0.3 # decoder_drop_prob = 0.3 # lr = 0.0005 # # model = LSTMEncoderDecoderAtt(src_w2i, src_i2w, tgt_w2i, tgt_i2w, embedding_dim, encoder_hidden_dim, # decoder_hidden_dim, encoder_n_layers, decoder_n_layers, # encoder_drop_prob=encoder_drop_prob, decoder_drop_prob=decoder_drop_prob, lr=lr, # model_store_path="../../train/lstm_att") #
dec_dropout=0.33, dec_lstm_dropout=0.33, dec_vocab_size=len(tgt_w2i), dec_attention_type="coverage", dec_transfer_hidden=True) print("_" * 80 + "\n") print(model) print("_" * 80 + "\n") optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, amsgrad=True) #, weight_decay=1e-3) lr_scheduler = None train(model, src_i2w, tgt_i2w, train_loader, valid_loader, test_loader, model_store_path=os.path.join("..", "..", "train", "lstm_selfattn"), resume=False, max_epochs=500, patience=25, optimizer=optimizer, lr_scheduler=lr_scheduler, tf_start_ratio=0.9, tf_end_ratio=0.1, tf_epochs_decay=50) # ######################################################################
print("Step-size: {}, lr: {} -> {}".format(step_size, end_lr/factor, end_lr)) lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr]) """ optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, amsgrad=True) #, weight_decay=1e-3) #optimizer = torch.optim.SGD(model.parameters(), lr=.1, momentum=0.9) lr_scheduler = None #from models.components.criteria.SmoothedCrossEntropyLoss import SmoothedCrossEntropyLoss #criterion = SmoothedCrossEntropyLoss(ignore_index=0, label_smoothing=0.9) #criterion = SmoothedCrossEntropyLoss(label_smoothing=1.) # simple crossentropy, no ignore index set #criterion = SmoothedCrossEntropyLoss(ignore_index=0, label_smoothing=1.) # simple crossentropy, with ignore index set criterion = nn.NLLLoss( ignore_index=tgt_lookup.convert_tokens_to_ids(tgt_lookup.pad_token)) train(model, train_loader, valid_loader, test_loader, model_store_path=os.path.join("..", "..", "train", "gpt2_lstm"), resume=False, max_epochs=400, patience=25, optimizer=optimizer, lr_scheduler=lr_scheduler, tf_start_ratio=1.0, tf_end_ratio=0.1, tf_epochs_decay=50) # ######################################################################