tgt_lookup.save_special_tokens( file_prefix=os.path.join(output_lookup_folder, "tgt")) print("Done.") # check everything is ok lookup = Lookup(type="gpt2") lookup.load(file_prefix=os.path.join(output_lookup_folder, "tgt")) text = "This is a test." token_ids = lookup.encode(text) print("Encode: {}".format(token_ids)) recreated_string = lookup.decode(token_ids) print("Decode: {}".format(recreated_string)) print("Map w2i:") tokens = lookup.tokenize(text) for i in range(len(tokens)): print("\t[{}] = [{}]".format(tokens[i], lookup.convert_tokens_to_ids(tokens[i]))) print("Map i2w:") for i in range(len(token_ids)): print("\t[{}] = [{}]".format(token_ids[i], lookup.convert_ids_to_tokens(token_ids[i]))) token_ids = lookup.encode(text, add_bos_eos_tokens=True) print("Encode with bos/eos: {}".format(token_ids)) recreated_string = lookup.decode(token_ids) print("Decode with bos/eos: {}".format(recreated_string)) recreated_string = lookup.decode(token_ids, skip_bos_eos_tokens=True) print("Decode w/o bos/eos: {}".format(recreated_string))
step_size = len(train_loader) factor = 4 clr = cyclical_lr(step_size, min_lr=end_lr/factor, max_lr=end_lr) #, decay_factor_per_step=.97) print("Step-size: {}, lr: {} -> {}".format(step_size, end_lr/factor, end_lr)) lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr]) """ optimizer = torch.optim.Adam(model.parameters(), lr=3e-4, amsgrad=True)#, weight_decay=1e-3) #optimizer = torch.optim.SGD(model.parameters(), lr=.1, momentum=0.9) lr_scheduler = None #from models.components.criteria.SmoothedCrossEntropyLoss import SmoothedCrossEntropyLoss #criterion = SmoothedCrossEntropyLoss(ignore_index=0, label_smoothing=0.9) #criterion = SmoothedCrossEntropyLoss(label_smoothing=1.) # simple crossentropy, no ignore index set #criterion = SmoothedCrossEntropyLoss(ignore_index=0, label_smoothing=1.) # simple crossentropy, with ignore index set criterion = nn.NLLLoss(ignore_index=tgt_lookup.convert_tokens_to_ids(tgt_lookup.pad_token)) train(model, train_loader, valid_loader, test_loader, model_store_path = os.path.join("..", "..", "train", "lstm_fa_pn"), resume = False, max_epochs = 500, patience = 35, optimizer = optimizer, lr_scheduler = lr_scheduler, tf_start_ratio=.5, tf_end_ratio=.1, tf_epochs_decay=50)
factor = 4 clr = cyclical_lr(step_size, min_lr=end_lr/factor, max_lr=end_lr) #, decay_factor_per_step=.97) print("Step-size: {}, lr: {} -> {}".format(step_size, end_lr/factor, end_lr)) lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr]) """ optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, amsgrad=True) #, weight_decay=1e-3) #optimizer = torch.optim.SGD(model.parameters(), lr=.1, momentum=0.9) lr_scheduler = None from models.components.criteria.SmoothedCrossEntropyLoss import SmoothedCrossEntropyLoss #criterion = SmoothedCrossEntropyLoss(ignore_index=0, label_smoothing=0.9) #criterion = SmoothedCrossEntropyLoss(label_smoothing=1.) # simple crossentropy, no ignore index set criterion = SmoothedCrossEntropyLoss( ignore_index=tgt_lookup.convert_tokens_to_ids(tgt_lookup.pad_token), label_smoothing=1.) # simple crossentropy, with ignore index set train(model, train_loader, valid_loader, test_loader, model_store_path=os.path.join("..", "..", "train", "lstm"), resume=False, max_epochs=400, patience=25, optimizer=optimizer, lr_scheduler=lr_scheduler, tf_start_ratio=1.0, tf_end_ratio=0.1, tf_epochs_decay=50)
tgt_lookup.save_special_tokens(file_prefix = os.path.join(output_lookup_folder,"tgt")) print("Done.") # check everything is ok lookup = Lookup(type="gpt2") lookup.load(file_prefix = os.path.join(output_lookup_folder,"tgt")) text = "This is a test." token_ids = lookup.encode(text) print("Encode: {}".format(token_ids)) recreated_string = lookup.decode(token_ids) print("Decode: {}".format(recreated_string)) print("Map w2i:") tokens = lookup.tokenize(text) for i in range(len(tokens)): print("\t[{}] = [{}]".format(tokens[i], lookup.convert_tokens_to_ids(tokens[i]))) print("Map i2w:") for i in range(len(token_ids)): print("\t[{}] = [{}]".format(token_ids[i], lookup.convert_ids_to_tokens(token_ids[i]))) token_ids = lookup.encode(text, add_bos_eos_tokens = True) print("Encode with bos/eos: {}".format(token_ids)) recreated_string = lookup.decode(token_ids) print("Decode with bos/eos: {}".format(recreated_string)) recreated_string = lookup.decode(token_ids, skip_bos_eos_tokens = True) print("Decode w/o bos/eos: {}".format(recreated_string))
end_lr = 500. step_size = len(train_loader) factor = 4 clr = cyclical_lr(step_size, min_lr=end_lr/factor, max_lr=end_lr) #, decay_factor_per_step=.97) print("Step-size: {}, lr: {} -> {}".format(step_size, end_lr/factor, end_lr)) lr_scheduler = torch.optim.lr_scheduler.LambdaLR(optimizer, [clr]) """ optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, amsgrad=True)#, weight_decay=1e-3) #optimizer = torch.optim.SGD(model.parameters(), lr=.1, momentum=0.9) lr_scheduler = None from models.components.criteria.SmoothedCrossEntropyLoss import SmoothedCrossEntropyLoss #criterion = SmoothedCrossEntropyLoss(ignore_index=0, label_smoothing=0.9) #criterion = SmoothedCrossEntropyLoss(label_smoothing=1.) # simple crossentropy, no ignore index set criterion = SmoothedCrossEntropyLoss(ignore_index=tgt_lookup.convert_tokens_to_ids(tgt_lookup.pad_token), label_smoothing=1.) # simple crossentropy, with ignore index set train(model, train_loader, valid_loader, test_loader, model_store_path = os.path.join("..", "..", "train", "lstm_fa"), resume = False, max_epochs = 500, patience = 30, optimizer = optimizer, lr_scheduler = lr_scheduler, tf_start_ratio=.5, tf_end_ratio=.0, tf_epochs_decay=50)