def train(train_model, prediction_model, model_name='baseline', epochs=2, batch_size=32, validation_split=0.1): # load training data qa_data = QAData() questions, good_answers, bad_answers = qa_data.get_training_data() logger.info( f'Training: epochs {epochs}, batch_size {batch_size}, validation_split {validation_split}' ) # train the model Y = np.zeros(shape=(questions.shape[0], )) early_stopping = EarlyStopping(monitor='val_loss', mode='min', min_delta=0.01, verbose=1, patience=50, restore_best_weights=True) hist = train_model.fit([questions, good_answers, bad_answers], Y, epochs=epochs, batch_size=batch_size, validation_split=validation_split, verbose=1, callbacks=[early_stopping]) # save plot val_loss, loss df = pd.DataFrame(hist.history) df.insert(0, 'epochs', range(0, len(df))) df = pd.melt(df, id_vars=['epochs']) plot = ggplot(aes(x='epochs', y='value', color='variable'), data=df) + geom_line() filename = f'{model_name}_plot.png' logger.info(f'saving loss, val_loss plot: {filename}') plot.save(filename) #save_model_architecture(prediction_model, model_name=model_name) save_model_weights(train_model, model_name=model_name) clear_session()
def run(args, logger): tokenizer = AutoTokenizer.from_pretrained(args.model_name) train_data = QAData(logger, args, args.train_file, True) dev_data = QAData(logger, args, args.predict_file, False) train_data.load_dataset(tokenizer) train_data.load_dataloader() dev_data.load_dataset(tokenizer) dev_data.load_dataloader() if args.do_train: if args.checkpoint is not None: def convert_to_single_gpu(state_dict): def _convert(key): if key.startswith('module.'): return key[7:] return key return {_convert(key):value for key, value in state_dict.items()} model = AutoModelForSeq2SeqLM.from_pretrained(args.model_name, state_dict=convert_to_single_gpu(torch.load(args.checkpoint))) else: model = AutoModelForSeq2SeqLM.from_pretrained(args.model_name) if args.n_gpu>1: model = torch.nn.DataParallel(model) if torch.cuda.is_available(): model.to(torch.device("cuda")) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay}, {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=100000) train(args, logger, model, train_data, dev_data, optimizer, scheduler) if args.do_predict: checkpoint = os.path.join(args.output_dir, 'best-model.pt') def convert_to_single_gpu(state_dict): def _convert(key): if key.startswith('module.'): return key[7:] return key return {_convert(key):value for key, value in state_dict.items()} model = AutoModelForSeq2SeqLM.from_pretrained(args.model_name, state_dict=convert_to_single_gpu(torch.load(args.checkpoint))) logger.info("Loading checkpoint from {}".format(checkpoint)) if torch.cuda.is_available(): model.to(torch.device("cuda")) model.eval() ems = inference(model, dev_data, save_predictions=True) logger.info("%s on %s data: %.2f" % (dev_data.metric, dev_data.data_type, np.mean(ems)*100))
def main(mode='test', question=None, answers=None): """ This function is used to train, predict or test Args: mode (str): train/preddict/test question (str): this contains the question answers (list): this contains list of answers in string format Returns: index (integer): index of the most likely answer """ # get the train and predict model model vocabulary = Vocabulary("./data/vocab_all.txt") embedding_file = "./data/word2vec_100_dim.embeddings" qa_model = QAModel() train_model, predict_model = qa_model.get_bilstm_model( embedding_file, len(vocabulary)) epoch = 1 if mode == 'train': for i in range(epoch): print('Training epoch', i) # load training data qa_data = QAData() questions, good_answers, bad_answers = qa_data.get_training_data() # train the model Y = np.zeros(shape=(questions.shape[0], )) train_model.fit([questions, good_answers, bad_answers], Y, epochs=1, batch_size=64, validation_split=0.1, verbose=1) # save the trained model train_model.save_weights('model/train_weights_epoch_' + str(epoch) + '.h5', overwrite=True) predict_model.save_weights('model/predict_weights_epoch_' + str(epoch) + '.h5', overwrite=True) elif mode == 'predict': # load the evaluation data data = pickle.load(open("./data/dev.pkl", 'rb')) random.shuffle(data) # load weights from trained model qa_data = QAData() predict_model.load_weights('model/lstm_predict_weights_epoch_1.h5') c = 0 c1 = 0 for i, d in enumerate(data): print(i, len(data)) # pad the data and get it in desired format indices, answers, question = qa_data.process_data(d) # get the similarity score sims = predict_model.predict([question, answers]) n_good = len(d['good']) max_r = np.argmax(sims) max_n = np.argmax(sims[:n_good]) r = rankdata(sims, method='max') c += 1 if max_r == max_n else 0 c1 += 1 / float(r[max_r] - r[max_n] + 1) precision = c / float(len(data)) mrr = c1 / float(len(data)) print("Precision", precision) print("MRR", mrr) elif mode == 'test': # question and answers come from params qa_data = QAData() answers, question = qa_data.process_test_data(question, answers) # load weights from the trained model predict_model.load_weights('model/lstm_predict_weights_epoch_1.h5') # get similarity score sims = predict_model.predict([question, answers]) max_r = np.argmax(sims) return max_r
from model import QAModel from data import QAData, Vocabulary qa_data = QAData() # training_set: [{'question': [96, 3968, 21507, 13287, 16531, 4502], 'answers': [15916]}] # answers: {24981: [15927, 9500, 140, 17589, 21345, 1628, 5560, 16978, 22126, 11413, 8813, 7914, 12683, 16978, 1175, 5790, 21960, 13395, 10752, 12683, 7624, 16152, 16634, 13260, 1628, 7624, 7644, 5460, 7624, 15575, 13085, 19711, 7624, 3520, 19588, 15927, 16152, 21507, 7624, 16217, 5460, 15927, 11503, 7624, 21960, 1919, 6751, 13395, 7624, 10752, 9236, 15927, 9476, 2203, 11147, 21507, 1561, 16152, 8334, 15927, 6900, 3400, 21711]} # dev:[ {'bad' : [ 500 indices of answers], 'good': [17628], 'question': [8668, 21507, 19625, 22159, 5460, 3205]}]
# predict_model.layers[2].layers[6].output, predict_model.layers[2].layers[7].get_output_at(0), # predict_model.layers[2].layers[7].get_output_at(1), predict_model.layers[2].layers[8].get_output_at(0), # predict_model.layers[2].layers[8].get_output_at(1), predict_model.layers[2].layers[9].get_output_at(0), # predict_model.layers[2].layers[9].get_output_at(1), predict_model.layers[2].layers[10].get_output_at(0), # predict_model.layers[2].layers[10].get_output_at(1), predict_model.layers[2].layers[11].output, # predict_model.layers[2].layers[11].output, predict_model.layers[2].layers[13].get_output_at(0), # predict_model.layers[2].layers[13].get_output_at(1), predict_model.layers[2].layers[14].output] layer_outputs = [predict_model.layers[2].layers[13].get_output_at(0)]#, predict_model.layers[2].layers[13].get_output_at(1), predict_model.layers[2].layers[14].output] act_model = Model(inputs = predict_model.inputs, outputs=layer_outputs) data = pickle.load(open("./data/dev.pkl",'rb')) print("Total data length", len(data)) qa_data = QAData() random.shuffle(data) for i,d in enumerate(data): print("i",i) indices, answers, question = qa_data.process_data(d) #print("indices", indices) # print("answers", answers.shape) #print("question", question.shape) print("Input:", d) sims = predict_model.predict([question, answers]) #print("sims", sims) n_good = len(d['good']) max_r = np.argmax(sims) max_n = np.argmax(sims[:n_good]) if max_r == max_n: print("Correct")
def run(args, logger): tokenizer = BartTokenizer.from_pretrained( "facebook/bart-large") #TJH: bart-large if args.is_unifiedqa: dev_data = UnifiedQAData(logger, args, args.predict_file, False) else: dev_data = QAData(logger, args, args.predict_file, False) if not args.skip_inference: dev_data.load_dataset(tokenizer) dev_data.load_dataloader() if args.do_train: if args.is_unifiedqa: train_data = UnifiedQAData(logger, args, args.train_file, True) else: train_data = QAData(logger, args, args.train_file, True) train_data.load_dataset(tokenizer) train_data.load_dataloader() if args.checkpoint is not None: model = MyBart.from_pretrained( "facebook/bart-large", state_dict=torch.load(args.checkpoint)) #TJH: bart-large logger.info("Loading checkpoint from {}".format( args.checkpoint)) #TJH Added else: model = MyBart.from_pretrained( "facebook/bart-large") #TJH: bart-large if args.n_gpu > 1: model = torch.nn.DataParallel(model) if args.n_gpu > 0: model.to(torch.device("cuda")) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], 'weight_decay': args.weight_decay }, { 'params': [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], 'weight_decay': 0.0 }] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=100000) train(args, logger, model, train_data, dev_data, optimizer, scheduler) if args.do_predict: checkpoint = os.path.join( args.output_dir, 'best-model.pt') if args.checkpoint is None else args.checkpoint model = MyBart.from_pretrained( "facebook/bart-large", state_dict=torch.load(checkpoint)) #TJH: bart-large logger.info("Loading checkpoint from {}".format(checkpoint)) if args.n_gpu > 0: model.to(torch.device("cuda")) model.eval() ems = inference(model, dev_data, save_predictions=True) logger.info("%s on %s data: %.2f" % (dev_data.metric, dev_data.data_type, np.mean(ems) * 100))
def main(mode='test'): # get the train and predict model model vocabulary = Vocabulary("./data/vocab_all.txt") embedding_file = "./data/word2vec_100_dim.embeddings" qa_model = QAModel() train_model, predict_model = qa_model.get_lstm_cnn_model(embedding_file, len(vocabulary)) epo = 100 if mode == 'train': # load training data qa_data = QAData() questions, good_answers, bad_answers = qa_data.get_training_data() callbacks = [EarlyStopping(monitor='val_loss', patience=20), ModelCheckpoint(filepath='best_model.h5', monitor='val_loss', save_best_only=True)] # train the model Y = np.zeros(shape=(questions.shape[0],)) train_model.fit([questions, good_answers, bad_answers], Y, epochs=epo, batch_size=64, validation_split=0.1, verbose=1, callbacks=callbacks) # save the trained model # train_model.save_weights('model/train_weights_epoch_' + str(epo) + '.h5', overwrite=True) model = keras.models.load_model('best_model.h5') model.save_weights('model/best_weights_epoch_' + str(epo) + '.h5', overwrite=True) predict_model.save_weights('model/predict_weights_epoch_' + str(epo) + '.h5', overwrite=True) elif mode == 'predict': # load the evaluation data data = pickle.load(open("./data/dev.pkl",'rb')) random.shuffle(data) # load weights from trained model qa_data = QAData() model_filenames = ['model/best_model.h5', 'model/predict_weights_epoch_' + str(epo) + '.h5'] for model_name in model_filenames: predict_model.load_weights(model_name) c = 0 c1 = 0 for i, d in enumerate(data): if i%100 == 0: print(i, len(data)) # pad the data and get it in desired format indices, answers, question = qa_data.process_data(d) # get the similarity score sims = predict_model.predict([question, answers]) n_good = len(d['good']) max_r = np.argmax(sims) max_n = np.argmax(sims[:n_good]) r = rankdata(sims, method='max') c += 1 if max_r == max_n else 0 c1 += 1 / float(r[max_r] - r[max_n] + 1) precision = c / float(len(data)) mrr = c1 / float(len(data)) print(f'Results for: model: {model_name}') print("Precision", precision) print("MRR", mrr)
def main(mode='train', question=None, answers=None, epochs=2, batch_size=32, validation_split=0.1, model_name='baseline'): """ This function is used to train, predict or test Args: mode (str): train/preddict/test question (str): this contains the question answers (list): this contains list of answers in string format Returns: index (integer): index of the most likely answer """ # get the train and predict model model if mode == 'train': # baseline model train_model, prediction_model = get_baseline_model() train_model.summary() train(train_model, prediction_model, epochs=epochs, batch_size=batch_size, validation_split=validation_split) # small model small_train_model, small_prediction_model = get_small_model() small_train_model.summary() train(small_train_model, small_prediction_model, model_name='small', epochs=epochs, batch_size=batch_size, validation_split=validation_split) # larger model larger_train_model, larger_prediction_model = get_larger_model() larger_train_model.summary() train(larger_train_model, larger_prediction_model, model_name='larger', epochs=epochs, batch_size=batch_size, validation_split=validation_split) elif mode == 'predict': # load the evaluation data data = [] with open('data/test.json') as read_file: data = json.load(read_file) random.shuffle(data) qa_data = QAData() # create model from json model's architecture saved # logger.info(f'Loading models architecture: model/model_architecture_{model_name}.json') logger.info(f'Creating predict model: {model_name}') #with open(f'model/model_architecture_{model_name}.json', 'r') as read_file: # json_string = read_file.read() #predict_model = model_from_json(json_string) predict_model = None if model_name == 'small': _, predict_model = get_small_model() elif model_name == 'larger': _, predict_model = get_larger_model() else: _, predict_model = get_baseline_model() # load weights logger.info( f'Loading model weigths: model/train_weights_{model_name}.h5') predict_model.load_weights(f'model/train_weights_{model_name}.h5') c = 0 c1 = 0 for i, d in enumerate(data): print(i, len(data)) # pad the data and get it in desired format answers, question = qa_data.process_data(d) # get the similarity score sims = predict_model.predict([question, answers]) n_good = len(d['good']) max_r = np.argmax(sims) max_n = np.argmax(sims[:n_good]) r = rankdata(sims, method='max') c += 1 if max_r == max_n else 0 c1 += 1 / float(r[max_r] - r[max_n] + 1) precision = c / float(len(data)) mrr = c1 / float(len(data)) print("Precision", precision) print("MRR", mrr) elif mode == 'test': # question and answers come from params qa_data = QAData() answers, question = qa_data.process_test_data(question, answers) # create model from json model's architecture saved logger.info( f'Loading models architecture: model/model_architecture_{model_name}.json' ) json_string = '' with open(f'model/model_architecture_{model_name}.json', 'r') as read_file: json_string = read_file.read() predict_model = model_from_json(json_string) # load weights logger.info( f'Loading model weigths: model/train_weights_{model_name}.h5') predict_model.load_weights(f'model/train_weights_{model_name}.h5') # get similarity score sims = predict_model.predict([question, answers]) max_r = np.argmax(sims) return max_r
def run(gpu, args): rank = gpu dist.init_process_group( backend='nccl', init_method='env://', world_size=args.world_size, rank=rank ) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) log_filename = "{}log-gpu{}.txt".format("" if args.do_train else "eval_", gpu) logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO, handlers=[logging.FileHandler(os.path.join(args.output_dir, log_filename)), logging.StreamHandler()]) logger = logging.getLogger(__name__) logger.info(args) logger.info(args.output_dir) if args.lm_format: tokenizer = GPT2TokenizerFast.from_pretrained('gpt2-large') else: tokenizer = BartTokenizer.from_pretrained("bart-large") if args.is_unifiedqa: dev_data = UnifiedQAData(logger, args, args.predict_file, is_training=False, lm_format=args.lm_format) else: dev_data = QAData(logger, args, args.predict_file, False) if not args.skip_inference: dev_data.load_dataset(tokenizer) dev_data.load_dataloader() if args.do_train: if args.is_unifiedqa: train_data = UnifiedQAData(logger, args, args.train_file, is_training=True, lm_format=args.lm_format) else: train_data = QAData(logger, args, args.train_file, True) train_data.load_dataset(tokenizer) sampler = torch.utils.data.distributed.DistributedSampler(train_data, num_replicas=args.world_size, rank=rank) train_data.load_dataloader(sampler) if args.checkpoint is not None: if args.lm_format: model = MyGPT2.from_pretrained("gpt2-large", state_dict=torch.load(args.checkpoint)) #model.parallelize() else: model = MyBart.from_pretrained("bart-large", state_dict=torch.load(args.checkpoint)) else: if args.lm_format: model = MyGPT2.from_pretrained("gpt2-large") #model.parallelize() else: model = MyBart.from_pretrained("bart-large") ''' if args.n_gpu>1: model = torch.nn.DataParallel(model) if args.n_gpu>0: model.to(torch.device("cuda")) ''' torch.cuda.set_device(gpu) model.cuda(gpu) no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': args.weight_decay}, {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=args.steps) model = torch.nn.parallel.DistributedDataParallel(model, device_ids=[gpu]) train(args, logger, model, train_data, dev_data, optimizer, scheduler, rank) if args.do_predict: checkpoint = os.path.join(args.output_dir, 'best-model.pt') if args.checkpoint is None else args.checkpoint model = MyBart.from_pretrained("bart-large", state_dict=torch.load(checkpoint)) logger.info("Loading checkpoint from {}".format(checkpoint)) if args.n_gpu>0: model.to(torch.device("cuda")) model.eval() ems = inference(model, dev_data, save_predictions=True) logger.info("%s on %s data: %.2f" % (dev_data.metric, dev_data.data_type, np.mean(ems)*100))