def main(): parser = argparse.ArgumentParser() parser.add_argument('-src_data', required=True) parser.add_argument('-trg_data', required=True) parser.add_argument('-src_lang', required=True) parser.add_argument('-trg_lang', required=True) parser.add_argument('-c', type=float, default=0.9) parser.add_argument('-SGDR', action='store_true') parser.add_argument('-epochs', type=int, required=True) parser.add_argument('-d_model', type=int, default=512) parser.add_argument('-n_layers', type=int, required=True) parser.add_argument('-heads', type=int, default=4) parser.add_argument('-dropout', type=int, default=0.1) parser.add_argument('-batchsize', type=int, required=True) parser.add_argument('-printevery', type=int, default=100) parser.add_argument('-lr', type=int, default=1e-7) parser.add_argument('-load_weights') parser.add_argument('-create_valset', action='store_true') parser.add_argument('-floyd', action='store_true') parser.add_argument('-checkpoint', type=int, default=0) opt = parser.parse_args() read_data(opt) SRC, TRG = create_fields(opt) opt.train = create_dataset(opt, SRC, TRG) #opt.train1 = create_dataset1(opt, SRC, TRG) model = get_model(opt, len(SRC.vocab), len(TRG.vocab)) opt.optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.999), eps=1e-8) if opt.SGDR == True: opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len) train_model(model, opt, SRC, TRG)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-src_data', default="./data/in.txt") # 原始数据 初始文件 parser.add_argument('-trg_data', default="./data/out.txt") # 原始数据 目标文件 parser.add_argument('-cuda', default=True, action='store_true') # 是否使用cuda 添加此将禁用cuda,并运行模型在cpu parser.add_argument('-epochs', type=int, default=100) # 要为训练多少个epochs数据(默认值为1000) parser.add_argument('-d_model', type=int, default=512) # 嵌入向量和层的维数(默认为512) parser.add_argument('-n_layers', type=int, default=6) # 在Transformer模型中有多少层(默认=6) parser.add_argument('-heads', type=int, default=8) # 需要分割多少个头部以获得多个头部的注意(默认值为8) parser.add_argument('-dropout', type=int, default=0.02) # 决定多大的dropout将(默认=0.1) parser.add_argument('-batchsize', type=int, default=128) # 以每次迭代中提供给模型的令牌数(默认值为1500)来度量 # parser.add_argument('-printevery', type=int, default=200) # 在打印前运行多少次迭代(默认值为100) parser.add_argument('-lr', type=int, default=0.0001) # 学习率(默认值为0.0001) parser.add_argument('-premodels', default=False) # 是否加载原来的权重 和 vecab parser.add_argument('-load_weights', default="weights") # 如果加载预训练的权重,把路径到文件夹,以前的权重和泡菜保存 parser.add_argument('-premodels_path', default="model_weights_s5") # 预训练的模型文件名 parser.add_argument('-max_strlen', type=int, default=32) # 判刑与更多的词将不包括在数据集(默认=32) parser.add_argument('-SGDR', action='store_true') # 增加这将实现随机梯度下降与重启,使用余弦退火 parser.add_argument('-create_valset', action='store_true') parser.add_argument('-floyd', default=True, action='store_true') # ??????? parser.add_argument('-checktime', type=int, default=60) # 每隔多长时间模型的权重就会被保存到文件夹'weights/' parser.add_argument('-checkpoint', type=int, default=10) # 每隔多少epochs模型的权重就会被保存到文件夹'weights/' #-src_ -trg_data data/english1.txt -src_lang en -trg_lang en -floyd -checkpoint 15 -batchsize 3000 -epochs 10 opt = parser.parse_args() opt.device = 0 if opt.cuda is True else -1 if opt.device == 0: assert torch.cuda.is_available() read_data(opt) # 判断数据集是否存在 需要 SRC, TRG = create_fields(opt) opt.train = create_dataset(opt, SRC, TRG) model = get_model(opt, len(SRC.vocab), len(TRG.vocab)) opt.optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9) if opt.SGDR == True: opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len) # print("opt.train_len",opt.train_len) if opt.checkpoint > 0 or opt.checktime > 0: print( "model weights will be saved every %d minutes and at end of %d epoch to directory %s " % (opt.checktime, opt.checkpoint, config.weights)) train_model(model, opt)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-src_data', required=True) parser.add_argument('-trg_data', required=True) parser.add_argument('-src_lang', required=True) parser.add_argument('-trg_lang', required=True) parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-SGDR', action='store_true') parser.add_argument('-epochs', type=int, default=2) parser.add_argument('-d_model', type=int, default=512) parser.add_argument('-n_layers', type=int, default=6) parser.add_argument('-heads', type=int, default=8) parser.add_argument('-dropout', type=int, default=0.1) parser.add_argument('-batchsize', type=int, default=1500) parser.add_argument('-printevery', type=int, default=100) parser.add_argument('-lr', type=int, default=0.0001) parser.add_argument('-load_weights') parser.add_argument('-create_valset', action='store_true') parser.add_argument('-max_strlen', type=int, default=80) parser.add_argument('-floyd', action='store_true') parser.add_argument('-checkpoint', type=int, default=0) opt = parser.parse_args() args = argparse.Namespace() args.is_cuda = False # opt.device = 0 if opt.no_cuda is False else -1 # if opt.device == 0: # assert torch.cuda.is_available() read_data(args) ss('-in main') SRC, TRG = create_fields(opt) opt.train = create_dataset(opt, SRC, TRG) model = get_model(opt, len(SRC.vocab), len(TRG.vocab)) opt.optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9) if opt.SGDR == True: opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len) if opt.checkpoint > 0: print( "model weights will be saved every %d minutes and at end of epoch to directory weights/" % (opt.checkpoint)) if opt.load_weights is not None and opt.floyd is not None: os.mkdir('weights') pickle.dump(SRC, open('weights/SRC.pkl', 'wb')) pickle.dump(TRG, open('weights/TRG.pkl', 'wb')) train_model(model, opt) if opt.floyd is False: promptNextAction(model, opt, SRC, TRG)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-mode', default='train') parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-SGDR', action='store_true') parser.add_argument('-epochs', type=int, default=2000) parser.add_argument('-d_model', type=int, default=500) parser.add_argument('-n_layers', type=int, default=6) parser.add_argument('-heads', type=int, default=10) parser.add_argument('-dropout', type=int, default=0.2) parser.add_argument('-printevery', type=int, default=10) parser.add_argument('-lr', type=int, default=0.0001) parser.add_argument('-load_weights') parser.add_argument('-create_valset', action='store_true') parser.add_argument('-max_strlen', type=int, default=80) parser.add_argument('-floyd', action='store_true') parser.add_argument('-checkpoint', type=int, default=0) parser.add_argument('-batch_size', type=int, default=32) parser.add_argument('-vid_feat_size', type=int, default=500) parser.add_argument('-save_freq', type=int, default=2) parser.add_argument('-model_save_dir', default='model') parser.add_argument('-log_frequency', default=20) # DataLoader parser.add_argument('-num_train_set', type=int, default=8000) parser.add_argument('-video_features_file', default='activitynet/anet_v1.3.c3d.hdf5') parser.add_argument('-video_descriptions_file', default='activitynet_descriptions.pkl') parser.add_argument('-vocab_file', default='activitynet_vocab.pkl') parser.add_argument('-video_descriptions_csv', default='data/video_description.csv') parser.add_argument('-target_feature_size', type=int, default=14238) opt = parser.parse_args() opt.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') model = get_model(opt, opt.vid_feat_size, opt.target_feature_size) model = nn.DataParallel(model) if opt.mode == 'train': print("Training model for num_epochs - {}, vocab_size - {}...".format(opt.epochs, opt.target_feature_size)) opt.optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9) if opt.SGDR == True: opt.sched = CosineWithRestarts(opt.optimizer, T_max = 10) model.train() trainloader = DataLoader(opt=opt, train=True) evalloader = DataLoader(opt=opt, train=False) train_model(model, trainloader, evalloader, opt) elif opt.mode == 'eval': print("Evaluating model...") model.load_state_dict(torch.load(opt.model_save_dir + '/model_595.pth')) model.eval() print("Transformer model loaded") evalloader = DataLoader(opt=opt, train=False) eval_model(model, evalloader, opt) else: print("Wrong option. Give either 'train' or 'eval' as input to -mode")
def main(): parser = argparse.ArgumentParser() parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-SGDR', action='store_true') parser.add_argument('-epochs', type=int, default=10) parser.add_argument('-d_model', type=int, default=512) parser.add_argument('-n_layers', type=int, default=6) parser.add_argument('-heads', type=int, default=8) parser.add_argument('-dropout', type=int, default=0.1) parser.add_argument('-batch_size', type=int, default=512) parser.add_argument('-print_every', type=int, default=10) parser.add_argument('-lr', type=float, default=0.001) parser.add_argument('-patience', type=int, default=3) parser.add_argument('-retrain', type=bool, default=False) opt = parser.parse_args() opt.device = 'cuda' if opt.no_cuda is False else 'cpu' if opt.device == 'cuda': assert torch.cuda.is_available() if opt.retrain: print('load checkpoint ...') checkpoint = torch.load('models/checkpoint.chkpt', map_location=torch.device(opt.device)) opt = checkpoint['settings'] else: checkpoint = None data = pickle.load('data/m30k_deen_shr.pkl') vocab_src = data['vocab']['src'] vocab_trg = data['vocab']['trg'] vocab = {'src': vocab_src, 'trg': vocab_trg} utils.mkdir('models') pickle.dump(vocab, 'models/vocab.pkl') opt.src_pad = vocab_src.pad_idx opt.trg_pad = vocab_trg.pad_idx opt.max_src_len = data['max_len']['src'] opt.max_trg_len = data['max_len']['trg'] train_data_loader, valid_data_loader, test_data_loader = prepare_dataloaders(opt, data) model = init_model(opt, vocab_src.vocab_size, vocab_trg.vocab_size, checkpoint=checkpoint) optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9) if opt.SGDR == True: opt.sched = CosineWithRestarts(optimizer, T_max=len(train_data_loader)) train(model, optimizer, train_data_loader, valid_data_loader, opt) test(model, test_data_loader, opt)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-src_data', required=True) parser.add_argument('-trg_data', required=True) parser.add_argument('-src_lang', required=True) parser.add_argument('-trg_lang', required=True) parser.add_argument('-SGDR', action='store_true') parser.add_argument('-epochs', type=int, required=True) parser.add_argument('-d_model', type=int, default=512) parser.add_argument('-n_layers', type=int, required=True) parser.add_argument('-restart', type=int, required=True) parser.add_argument('-bestval', type=float, required=True) parser.add_argument('-heads', type=int, default=8) parser.add_argument('-dropout', type=int, default=0.1) parser.add_argument('-batchsize', type=int, required=True) parser.add_argument('-printevery', type=int, default=100) parser.add_argument('-aaa', type=float, required=True) parser.add_argument('-load_weights') parser.add_argument('-create_valset', action='store_true') parser.add_argument('-floyd', action='store_true') parser.add_argument('-checkpoint', type=int, default=0) parser.add_argument('-src_datav', required=True) parser.add_argument('-trg_datav', required=True) opt = parser.parse_args() print(torch.cuda.is_available()) print(torch.cuda.device_count()) print(torch.cuda.get_device_name(0)) print(torch.cuda.current_device()) read_data(opt) SRC, TRG = create_fields(opt) opt.train = create_dataset(opt, SRC, TRG) opt.train1 = create_dataset1(opt, SRC, TRG) model = get_model(opt, len(SRC.vocab), len(TRG.vocab)) aaa = opt.aaa opt.optimizer = torch.optim.Adam(model.parameters(), lr=aaa * 1e-8, betas=(0.9, 0.999), eps=1e-8) if opt.SGDR == True: opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len) train_model(model, opt, SRC, TRG)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-src_data', required=True) parser.add_argument('-trg_data', required=True) parser.add_argument('-src_lang', required=True) parser.add_argument('-trg_lang', required=True) parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-SGDR', action='store_true') parser.add_argument('-epochs', type=int, default=2) parser.add_argument('-d_model', type=int, default=512) parser.add_argument('-n_layers', type=int, default=6) parser.add_argument('-heads', type=int, default=8) parser.add_argument('-dropout', type=int, default=0.1) parser.add_argument('-batchsize', type=int, default=1500) parser.add_argument('-printevery', type=int, default=100) parser.add_argument('-lr', type=int, default=0.0001) parser.add_argument('-load_weights') parser.add_argument('-create_valset', action='store_true') parser.add_argument('-max_strlen', type=int, default=80) opt = parser.parse_args() opt.device = 0 if opt.no_cuda is False else -1 if opt.device == 0: assert torch.cuda.is_available() read_data(opt) SRC, TRG = create_fields(opt) opt.train = create_dataset(opt, SRC, TRG) model = get_model(opt, len(SRC.vocab), len(TRG.vocab)) opt.optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9) if opt.SGDR == True: opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len) train_model(model, opt) promptNextAction(model, opt, SRC, TRG)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-src_data', required=True) parser.add_argument('-trg_data', required=True) parser.add_argument('-src_lang', required=True) parser.add_argument('-trg_lang', required=True) parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-SGDR', action='store_true') parser.add_argument('-epochs', type=int, default=2) parser.add_argument('-d_model', type=int, default=512) parser.add_argument('-n_layers', type=int, default=6) parser.add_argument('-heads', type=int, default=8) parser.add_argument('-dropout', type=int, default=0.1) parser.add_argument('-batchsize', type=int, default=1500) parser.add_argument('-printevery', type=int, default=100) parser.add_argument('-lr', type=int, default=0.0001) parser.add_argument('-load_weights') parser.add_argument('-create_valset', action='store_true') parser.add_argument('-max_strlen', type=int, default=80) parser.add_argument('-floyd', action='store_true') parser.add_argument('-checkpoint', type=int, default=0) parser.add_argument('-savetokens', type=int, default=0) opt = parser.parse_args() opt.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(opt.device) read_data(opt) SRC, TRG = create_fields(opt) opt.train = create_dataset(opt, SRC, TRG) # # convert translation dictionary to tokens ditionary # translation_dictionar = pickle.load(open('data/translation_dictionary.p', 'rb')) # new_dict = {} # for en_word, fr_word in translation_dictionar.items(): # new_dict[SRC.vocab.stoi[en_word]] = TRG.vocab.stoi[fr_word.lower()] # pickle.dump(new_dict, open('data/tokenized_translation_dictionary.p', 'wb')) model = get_model(opt, len(SRC.vocab), len(TRG.vocab)) model = model.to(device=opt.device) if opt.savetokens == 1: pickle.dump(SRC.vocab, open('SRC_vocab.p', 'wb')) # saves torchtext Vocab object pickle.dump(TRG.vocab, open('TRG_vocab.p', 'wb')) # saves torchtext Vocab object opt.optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9) if opt.SGDR == True: opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len) if opt.checkpoint > 0: print( "model weights will be saved every %d minutes and at end of epoch to directory weights/" % (opt.checkpoint)) if opt.load_weights is not None and opt.floyd is not None: os.mkdir('weights') pickle.dump(SRC, open('weights/SRC.pkl', 'wb')) pickle.dump(TRG, open('weights/TRG.pkl', 'wb')) train_model(model, opt) if opt.floyd is False: promptNextAction(model, opt, SRC, TRG)
def main(): ############################ ### OPTIONAL 4 THe FUTURE ## # DO WEIGHT DECAY BASED ON # # ATTENTION PAPER !!!!! #### ############################ # step_list = [i*500 for i in range(2000)] # for step in step_list: # lrate = (1/np.sqrt(512)) * min(1/np.sqrt(step), step*4000**-1.5 ) # print(f'{step}: lrate {lrate}') parser = argparse.ArgumentParser() parser.add_argument('-src_data', required=True) parser.add_argument('-src_val_data', required=False, default='data/port_dev.txt') parser.add_argument('-trg_data', required=True) parser.add_argument('-trg_val_data', required=False, default='data/eng_dev.txt') parser.add_argument('-src_lang', required=True) parser.add_argument('-trg_lang', required=True) parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-SGDR', action='store_true') parser.add_argument('-epochs', type=int, default=2) parser.add_argument('-d_model', type=int, default=512) # hidden size for models using RNN parser.add_argument('-n_layers', type=int, default=6) parser.add_argument('-heads', type=int, default=8) parser.add_argument('-dropout', type=float, default=0.1) parser.add_argument('-batchsize', type=int, default=1500) parser.add_argument('-printevery', type=int, default=100) parser.add_argument('-lr', type=float, default=0.00015) parser.add_argument('-load_weights') parser.add_argument('-create_valset', action='store_true') parser.add_argument('-max_strlen', type=int, default=100) # max number of spaces per sentence parser.add_argument('-checkpoint', type=int, default=0) parser.add_argument('-decoder_extra_layers', type=int, default=0) parser.add_argument('-nmt_model_type', type=str, default='transformer') parser.add_argument('-word_embedding_type', type=str, default=None) parser.add_argument('-use_dynamic_batch', action='store_true') opt = parser.parse_args() print(opt) # class InputArgs(): # def __init__(self): # self.src_data = 'data/port_train.txt' # self.src_val_data = 'data/port_dev.txt' # self.trg_data = 'data/eng_train.txt' # self.trg_val_data = 'data/eng_dev.txt' # self.src_lang = 'pt' # self.trg_lang = 'en' # self.no_cuda = True # self.SGDR = False # self.epochs = 5 # self.d_model = 300 # self.n_layers = 6 # self.heads = 6 # self.dropout = 0.1 # self.batchsize = 1024 # self.printevery = 100 # self.lr = 0.00015 # self.load_weights = None # self.create_valset = False # self.max_strlen = 100 # self.checkpoint = 1 # self.decoder_extra_layers = 0 # self.nmt_model_type = 'rnn_naive_model' # 'transformer', 'rnn_naive_model', 'align_and_translate' ... # self.word_embedding_type = None # None, 'glove' or 'fast_text' # self.use_dynamic_batch = None # opt = InputArgs() # print(opt.__dict__) # opt.device = 0 if opt.no_cuda is False else torch.device("cpu") if opt.no_cuda is False: assert torch.cuda.is_available() opt.device = torch.device("cuda") else: opt.device = torch.device("cpu") i_t = time.time() if opt.word_embedding_type in ['glove', 'fast_text']: if opt.word_embedding_type == 'glove': word_emb = KeyedVectors.load_word2vec_format( 'word_embeddings/glove_s300.txt') elif opt.word_embedding_type == 'fast_text': word_emb = KeyedVectors.load_word2vec_format( 'word_embeddings/ftext_skip_s300.txt') now = time.time() minutes = math.floor((now - i_t) / 60) print( f'\nWord embeddding of type {str(opt.word_embedding_type)} took {minutes} minutes \ and {now - i_t - minutes*60:.2f} seconds to load.\n') elif opt.word_embedding_type is None: word_emb = opt.word_embedding_type read_data(opt) SRC, TRG = create_fields(opt) opt.SRC = SRC opt.TRG = TRG # important, these are used to input embeddings opt.train, opt.valid, SRC, TRG = create_dataset(opt, SRC, TRG, word_emb) opt.word_emb = word_emb # just for querying vocabulary model = get_model(opt, len(SRC.vocab), len(TRG.vocab), word_emb) opt.optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9) if opt.SGDR == True: opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len) if opt.checkpoint > 0: print( "model weights will be saved every %d minutes and at end of epoch to directory weights/" % (opt.checkpoint)) if opt.load_weights is not None: os.mkdir('weights') pickle.dump(SRC, open('weights/SRC.pkl', 'wb')) pickle.dump(TRG, open('weights/TRG.pkl', 'wb')) train_model(model, opt) promptNextAction(model, opt, SRC, TRG)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-data_path', required=True) parser.add_argument('-output_dir', required=True) parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-SGDR', action='store_true') parser.add_argument('-val_check_every_n', type=int, default=3) parser.add_argument('-calculate_val_loss', action='store_true') parser.add_argument('-val_forward_pass', action='store_true') parser.add_argument('-tensorboard_graph', action='store_true') parser.add_argument('-alex', action='store_true') parser.add_argument('-compositional_eval', action='store_true') parser.add_argument('-char_tokenization', action='store_true') parser.add_argument('-n_val', type=int, default=1000) parser.add_argument('-n_test', type=int, default=1000) parser.add_argument('-do_test', action='store_true') parser.add_argument('-epochs', type=int, default=50) parser.add_argument('-d_model', type=int, default=512) parser.add_argument('-n_layers', type=int, default=6) parser.add_argument('-heads', type=int, default=8) parser.add_argument('-dropout', type=int, default=0.1) parser.add_argument('-batchsize', type=int, default=3000) parser.add_argument('-printevery', type=int, default=100) parser.add_argument('-lr', type=int, default=0.0001) parser.add_argument('-load_weights') parser.add_argument('-create_valset', action='store_true') parser.add_argument('-max_strlen', type=int, default=512) parser.add_argument('-floyd', action='store_true') parser.add_argument('-checkpoint', type=int, default=0) opt = parser.parse_args() opt.device = 0 if opt.no_cuda is False else -1 if opt.device == 0: assert torch.cuda.is_available() if opt.alex: torch.cuda.set_device(1) read_data(opt) SRC, TRG = create_fields(opt) opt.train, opt.val = create_dataset(opt, SRC, TRG) model = get_model(opt, len(SRC.vocab), len(TRG.vocab), SRC) if opt.tensorboard_graph: writer = SummaryWriter('runs') for i, batch in enumerate(opt.train): src = batch.src.transpose(0, 1).cuda() trg = batch.trg.transpose(0, 1).cuda() trg_input = trg[:, :-1] src_mask, trg_mask = create_masks(src, trg_input, opt) writer.add_graph(model, (src, trg_input, src_mask, trg_mask)) break writer.close() # beam search parameters opt.k = 1 opt.max_len = opt.max_strlen opt.optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9) opt.scheduler = ReduceLROnPlateau(opt.optimizer, factor=0.5, patience=5, verbose=True) if opt.SGDR: opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len) if opt.checkpoint > 0: print( "model weights will be saved every %d minutes and at end of epoch to directory weights/" % (opt.checkpoint)) train_model(model, opt, SRC, TRG) if opt.floyd is False: promptNextAction(model, opt, SRC, TRG)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-src_data', default='data/english.txt') parser.add_argument('-trg_data', default='data/french.txt') parser.add_argument('-src_lang', default='en_core_web_sm') parser.add_argument('-trg_lang', default='fr_core_news_sm') parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-SGDR', action='store_true') parser.add_argument('-epochs', type=int, default=2) parser.add_argument('-d_model', type=int, default=512) parser.add_argument('-n_layers', type=int, default=6) parser.add_argument('-heads', type=int, default=8) parser.add_argument('-dropout', type=int, default=0.1) parser.add_argument('-batchsize', type=int, default=1500) parser.add_argument('-printevery', type=int, default=10) parser.add_argument('-lr', type=int, default=0.0001) parser.add_argument('-load_weights') parser.add_argument('-create_valset', action='store_true') parser.add_argument('-max_strlen', type=int, default=80) parser.add_argument('-floyd', action='store_true') parser.add_argument('-checkpoint', type=int, default=0) parser.add_argument('-output_dir', default='output') opt = parser.parse_args() print(opt) opt.device = "cpu" if opt.no_cuda else "cuda" if opt.device == "cuda": assert torch.cuda.is_available() read_data(opt) SRC, TRG = create_fields(opt) if not os.path.isdir(opt.output_dir): os.makedirs(opt.output_dir) opt.train = create_dataset(opt, SRC, TRG) model = get_model(opt, len(SRC.vocab), len(TRG.vocab)) if opt.device == "cuda": model.cuda() opt.optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9) if opt.SGDR == True: opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len) if opt.checkpoint > 0: print( "model weights will be saved every %d minutes and at end of epoch to directory weights/" % (opt.checkpoint)) if opt.load_weights is not None and opt.floyd is not None: os.mkdir('weights') pickle.dump(SRC, open('weights/SRC.pkl', 'wb')) pickle.dump(TRG, open('weights/TRG.pkl', 'wb')) print("saving field pickles to " + opt.output_dir + "/...") pickle.dump(SRC, open(f'{opt.output_dir}/SRC.pkl', 'wb')) pickle.dump(TRG, open(f'{opt.output_dir}/TRG.pkl', 'wb')) print("field pickles saved ! ") train_model(model, opt)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-data_path', required=True) parser.add_argument('-output_dir', required=True) parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-SGDR', action='store_true') parser.add_argument('-val_check_every_n', type=int, default=3) parser.add_argument('-calculate_val_loss', action='store_true') parser.add_argument('-val_forward_pass', action='store_true') parser.add_argument('-tensorboard_graph', action='store_true') parser.add_argument('-alex', action='store_true') parser.add_argument('-compositional_eval', action='store_true') parser.add_argument('-wandb', action='store_true') parser.add_argument('-n_val', type=int, default=1000) parser.add_argument('-n_test', type=int, default=1000) parser.add_argument('-do_test', action='store_true') parser.add_argument('-epochs', type=int, default=50) parser.add_argument('-d_model', type=int, default=512) parser.add_argument('-n_layers', type=int, default=6) parser.add_argument('-heads', type=int, default=8) parser.add_argument('-dropout', type=int, default=0.1) parser.add_argument('-mask_prob', type=float, default=0.5) parser.add_argument('-alpha', type=float, default=2) parser.add_argument('-batchsize', type=int, default=3000) parser.add_argument('-printevery', type=int, default=100) parser.add_argument('-log_interval', type=int, default=1000) parser.add_argument('-lr', type=int, default=0.0001) parser.add_argument('-load_weights') parser.add_argument('-load_r_to_o') parser.add_argument('-label_path') parser.add_argument('-create_valset', action='store_true') parser.add_argument('-task', type=str, choices=["toy_task", "e_snli_r", "e_snli_o", "cos_e"], default="toy_task") parser.add_argument('-max_strlen', type=int, default=512) parser.add_argument('-floyd', action='store_true') parser.add_argument('-checkpoint', type=int, default=0) opt = parser.parse_args() wandb_tags = [opt.task] opt.device = 0 if opt.no_cuda is False else -1 if opt.device == 0: assert torch.cuda.is_available() if opt.alex: torch.cuda.set_device(1) read_data(opt) if opt.task == 'e_snli_r': assert opt.label_path is not None opt.classifier_SRC, opt.classifier_TRG = create_label_fields(opt) with open(opt.load_r_to_o + '/SRC.pkl', 'rb') as f: old_SRC = pickle.load(f) with open(opt.load_r_to_o + '/TRG.pkl', 'rb') as f: old_TRG = pickle.load(f) opt.classifier_SRC.vocab = old_SRC.vocab opt.classifier_TRG.vocab = old_TRG.vocab SRC, TRG = create_fields(opt) opt.train, opt.val = create_dataset(opt, SRC, TRG) if opt.task == 'e_snli_o': model = get_classifier_model(opt, len(SRC.vocab), len(TRG.vocab)) else: if opt.task == 'e_snli_r': opt.classifier = load_r_to_o(opt, len(opt.classifier_SRC.vocab), len(opt.classifier_TRG.vocab)) model = get_model(opt, len(SRC.vocab), len(TRG.vocab), SRC) if opt.wandb: config = wandb.config config.learning_rate = opt.lr config.max_pred_length = opt.max_strlen config.mask_prob = opt.mask_prob config.batch_size = opt.batchsize config.log_interval = opt.log_interval group_name = 'masking_probability_p=' + str( opt.mask_prob) + '_alpha=' + str(opt.alpha) wandb.init(config=config, project='toy-task', entity='c-col', group=group_name, tags=wandb_tags) wandb.watch(model) if opt.tensorboard_graph: writer = SummaryWriter('runs') for i, batch in enumerate(opt.train): src = batch.src.transpose(0, 1).cuda() trg = batch.trg.transpose(0, 1).cuda() trg_input = trg[:, :-1] src_mask, trg_mask = create_masks(src, trg_input, opt) writer.add_graph(model, (src, trg_input, src_mask, trg_mask)) break writer.close() # beam search parameters opt.k = 1 opt.max_len = opt.max_strlen opt.optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9) opt.scheduler = ReduceLROnPlateau(opt.optimizer, factor=0.5, patience=5, verbose=True) if opt.SGDR: opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len) if opt.checkpoint > 0: print( "model weights will be saved every %d minutes and at end of epoch to directory weights/" % (opt.checkpoint)) train_model(model, opt, SRC, TRG) if opt.floyd is False: promptNextAction(model, opt, SRC, TRG)
def main(): parser = argparse.ArgumentParser() parser.add_argument('-fold', default=0) parser.add_argument('-src_lang', default='en') parser.add_argument('-trg_lang', default='en') parser.add_argument('-no_cuda', action='store_true') parser.add_argument('-SGDR', action='store_true') parser.add_argument('-epochs', type=int, default=2) parser.add_argument('-d_model', type=int, default=512) parser.add_argument('-n_layers', type=int, default=1) parser.add_argument('-heads', type=int, default=1) parser.add_argument('-dropout', type=int, default=0.1) parser.add_argument('-batchsize', type=int, default=1500) parser.add_argument('-printevery', type=int, default=100) parser.add_argument('-lr', type=int, default=0.001) parser.add_argument('-load_weights') parser.add_argument('-create_valset', action='store_true') parser.add_argument('-max_strlen', type=int, default=192) parser.add_argument('-floyd', action='store_true') parser.add_argument('-checkpoint', type=int, default=0) parser.add_argument('-savetokens', type=int, default=0) opt = parser.parse_args() opt.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") print(opt.device) read_data(opt) SRC, TRG = create_fields(opt) opt.train = create_dataset(opt, SRC, TRG) model = get_model(opt, len(SRC.vocab), len(TRG.vocab), model_type='train') model = model.to(device=opt.device) if opt.savetokens == 1: pickle.dump(SRC.vocab, open('SRC_vocab.p', 'wb')) # saves torchtext Vocab object pickle.dump(TRG.vocab, open('TRG_vocab.p', 'wb')) # saves torchtext Vocab object opt.optimizer = torch.optim.Adam(model.parameters(), lr=opt.lr, betas=(0.9, 0.98), eps=1e-9) if opt.SGDR == True: opt.sched = CosineWithRestarts(opt.optimizer, T_max=opt.train_len) if opt.checkpoint > 0: print("model weights will be saved every %d minutes and at end of epoch to directory weights/"%(opt.checkpoint)) if opt.load_weights is not None and opt.floyd is not None: os.mkdir('weights') pickle.dump(SRC, open('weights/SRC.pkl', 'wb')) pickle.dump(TRG, open('weights/TRG.pkl', 'wb')) # train model train_model(model, opt) # save weights dst = '../gdrive/My Drive/tweet-sentiment-extraction' print("saving weights to " + dst + "/...") torch.save(model.state_dict(), f'{dst}/model_weights') pickle.dump(SRC, open(f'{dst}/SRC.pkl', 'wb')) pickle.dump(TRG, open(f'{dst}/TRG.pkl', 'wb')) saved_once = 1 print("weights and field pickles saved to " + dst)