def main(): logger.info("Reading and creating arguments") args = read_arguments() logger.info("Reading Data") datasets = {} for split in args.splits: dataset = prepare_data.read_data_to_variable(args.data_paths[split], args.alphabets, args.device, symbolic_root=True) datasets[split] = dataset if args.set_num_training_samples is not None: print('Setting train and dev to %d samples' % args.set_num_training_samples) datasets = rearrange_splits.rearranging_splits( datasets, args.set_num_training_samples) logger.info("Creating Networks") num_data = sum(datasets['train'][1]) model, optimizer, dev_eval_dict, test_eval_dict, start_epoch = build_model_and_optimizer( args) best_model = deepcopy(model) best_optimizer = deepcopy(optimizer) logger.info('Training INFO of in domain %s' % args.domain) logger.info('Training on Dependecy Parsing') logger.info("train: gamma: %f, batch: %d, clip: %.2f, unk replace: %.2f" % (args.gamma, args.batch_size, args.clip, args.unk_replace)) logger.info('number of training samples for %s is: %d' % (args.domain, num_data)) logger.info("dropout(in, out, rnn): (%.2f, %.2f, %s)" % (args.p_in, args.p_out, args.p_rnn)) logger.info("num_epochs: %d" % (args.num_epochs)) print('\n') if not args.eval_mode: logger.info("Training") num_batches = prepare_data.calc_num_batches(datasets['train'], args.batch_size) lr = args.learning_rate patient = 0 decay = 0 for epoch in range(start_epoch + 1, args.num_epochs + 1): print( 'Epoch %d (Training: rnn mode: %s, optimizer: %s, learning rate=%.6f, eps=%.1e, decay rate=%.2f (schedule=%d, decay=%d)): ' % (epoch, args.rnn_mode, args.opt, lr, args.epsilon, args.decay_rate, args.schedule, decay)) model.train() total_loss = 0.0 total_arc_loss = 0.0 total_arc_tag_loss = 0.0 total_train_inst = 0.0 train_iter = prepare_data.iterate_batch_rand_bucket_choosing( datasets['train'], args.batch_size, args.device, unk_replace=args.unk_replace) start_time = time.time() batch_num = 0 for batch_num, batch in enumerate(train_iter): batch_num = batch_num + 1 optimizer.zero_grad() # compute loss of main task word, char, pos, ner_tags, heads, arc_tags, auto_label, masks, lengths = batch out_arc, out_arc_tag, masks, lengths = model.forward( word, char, pos, mask=masks, length=lengths) loss_arc, loss_arc_tag = model.loss(out_arc, out_arc_tag, heads, arc_tags, mask=masks, length=lengths) loss = loss_arc + loss_arc_tag # update losses num_insts = masks.data.sum() - word.size(0) total_arc_loss += loss_arc.item() * num_insts total_arc_tag_loss += loss_arc_tag.item() * num_insts total_loss += loss.item() * num_insts total_train_inst += num_insts # optimize parameters loss.backward() clip_grad_norm_(model.parameters(), args.clip) optimizer.step() time_ave = (time.time() - start_time) / batch_num time_left = (num_batches - batch_num) * time_ave # update log if batch_num % 50 == 0: log_info = 'train: %d/%d, domain: %s, total loss: %.2f, arc_loss: %.2f, arc_tag_loss: %.2f, time left: %.2fs' % \ (batch_num, num_batches, args.domain, total_loss / total_train_inst, total_arc_loss / total_train_inst, total_arc_tag_loss / total_train_inst, time_left) sys.stdout.write(log_info) sys.stdout.write('\n') sys.stdout.flush() print('\n') print( 'train: %d/%d, domain: %s, total_loss: %.2f, arc_loss: %.2f, arc_tag_loss: %.2f, time: %.2fs' % (batch_num, num_batches, args.domain, total_loss / total_train_inst, total_arc_loss / total_train_inst, total_arc_tag_loss / total_train_inst, time.time() - start_time)) dev_eval_dict, test_eval_dict, best_model, best_optimizer, patient = in_domain_evaluation( args, datasets, model, optimizer, dev_eval_dict, test_eval_dict, epoch, best_model, best_optimizer, patient) if patient >= args.schedule: lr = args.learning_rate / (1.0 + epoch * args.decay_rate) optimizer = generate_optimizer(args, lr, model.parameters()) print('updated learning rate to %.6f' % lr) patient = 0 print_results(test_eval_dict['in_domain'], 'test', args.domain, 'best_results') print('\n') for split in datasets.keys(): evaluation(args, datasets[split], split, best_model, args.domain, epoch, 'best_results') else: logger.info("Evaluating") epoch = start_epoch #for split in ['train', 'dev', 'test']: for split in ['test']: eval_dict = evaluation(args, datasets[split], split, model, args.domain, epoch, 'best_results') write_results(args, datasets[split], args.domain, split, model, args.domain, eval_dict)
def main(): logger.info("Reading and creating arguments") args = read_arguments() logger.info("Reading Data") datasets = {} for split in args.splits: dataset = prepare_data.read_data_to_variable(args.data_paths[split], args.alphabets, args.device, symbolic_root=True) datasets[split] = dataset if args.set_num_training_samples is not None: print('Note the change here') print('dev set in not touched similar to test set') # print('Setting train and dev to %d samples' % args.set_num_training_samples) datasets = rearrange_splits.rearranging_splits( datasets, args.set_num_training_samples) logger.info("Creating Networks") num_data = sum(datasets['train'][1]) ######################################################### # Here constraints need to be added. model, optimizer, dev_eval_dict, test_eval_dict, start_epoch = build_model_and_optimizer( args) best_model = deepcopy(model) best_optimizer = deepcopy(optimizer) ######################################################### logger.info('Training INFO of in domain %s' % args.domain) logger.info('Training on Dependecy Parsing') logger.info("train: gamma: %f, batch: %d, clip: %.2f, unk replace: %.2f" % (args.gamma, args.batch_size, args.clip, args.unk_replace)) logger.info('number of training samples for %s is: %d' % (args.domain, num_data)) logger.info("dropout(in, out, rnn): (%.2f, %.2f, %s)" % (args.p_in, args.p_out, args.p_rnn)) logger.info("num_epochs: %d" % (args.num_epochs)) print('\n') if not args.eval_mode: logger.info("Training") num_batches = prepare_data.calc_num_batches(datasets['train'], args.batch_size) lr = args.learning_rate patient = 0 decay = 0 for epoch in range(start_epoch + 1, args.num_epochs + 1): print( 'Epoch %d (Training: rnn mode: %s, optimizer: %s, learning rate=%.6f, eps=%.1e, decay rate=%.2f (schedule=%d, decay=%d)): ' % (epoch, args.rnn_mode, args.opt, lr, args.epsilon, args.decay_rate, args.schedule, decay)) model.train() total_loss = 0.0 total_arc_loss = 0.0 total_arc_tag_loss = 0.0 total_train_inst = 0.0 train_iter = prepare_data.iterate_batch_rand_bucket_choosing( datasets['train'], args.batch_size, args.device, unk_replace=args.unk_replace) start_time = time.time() batch_num = 0 for batch_num, batch in enumerate(train_iter): batch_num = batch_num + 1 optimizer.zero_grad() # compute loss of main task # word,pos,heads,ner_tags,masks,auto_label,heads: [16,25] # char: [16,25,29] # lengths: [16] # Why do we need auto_label word, char, pos, ner_tags, heads, arc_tags, auto_label, masks, lengths = batch out_arc, out_arc_tag, masks, lengths = model.forward( word, char, pos, mask=masks, length=lengths) # The decoder outputs a score s_ij , indicating the model belief that the latter should be the head of the former # out_arc:[16, 24, 24] #out_arc_tag_h: torch.Size([16, 24, 128]) #out_arc_tag_c: torch.Size([16, 24, 128]) # out_arc_tag =(out_arc_tag_h,out_arc_tag_c) loss_arc, loss_arc_tag = model.loss(out_arc, out_arc_tag, heads, arc_tags, mask=masks, length=lengths) loss = loss_arc + loss_arc_tag # update losses num_insts = masks.data.sum() - word.size(0) total_arc_loss += loss_arc.item() * num_insts total_arc_tag_loss += loss_arc_tag.item() * num_insts total_loss += loss.item() * num_insts total_train_inst += num_insts # optimize parameters loss.backward() clip_grad_norm_(model.parameters(), args.clip) optimizer.step() time_ave = (time.time() - start_time) / batch_num time_left = (num_batches - batch_num) * time_ave # update log if batch_num % 50 == 0: log_info = 'train: %d/%d, domain: %s, total loss: %.2f, arc_loss: %.2f, arc_tag_loss: %.2f, time left: %.2fs' % \ (batch_num, num_batches, args.domain, total_loss / total_train_inst, total_arc_loss / total_train_inst, total_arc_tag_loss / total_train_inst, time_left) sys.stdout.write(log_info) sys.stdout.write('\n') sys.stdout.flush() print('\n') print( 'train: %d/%d, domain: %s, total_loss: %.2f, arc_loss: %.2f, arc_tag_loss: %.2f, time: %.2fs' % (batch_num, num_batches, args.domain, total_loss / total_train_inst, total_arc_loss / total_train_inst, total_arc_tag_loss / total_train_inst, time.time() - start_time)) # dev_eval_dict, test_eval_dict, best_model, best_optimizer, patient, curr_dev_eval_dict = in_domain_evaluation( args, datasets, model, optimizer, dev_eval_dict, test_eval_dict, epoch, best_model, best_optimizer, patient) store ={'total_loss':str(total_loss.cpu().numpy() / total_train_inst.cpu().numpy())\ ,'arc_loss': str(total_arc_loss.cpu().numpy() / total_train_inst.cpu().numpy()), \ 'arc_tag_loss': str(total_arc_tag_loss.cpu().numpy() / total_train_inst.cpu().numpy()),'eval':curr_dev_eval_dict } ############################################# str_file = args.full_model_name + '_' + 'all_epochs' with open(str_file, 'a') as f: f.write(str(store) + '\n') ############################################### if patient >= args.schedule: lr = args.learning_rate / (1.0 + epoch * args.decay_rate) optimizer = generate_optimizer(args, lr, model.parameters()) print('updated learning rate to %.6f' % lr) patient = 0 print_results(test_eval_dict['in_domain'], 'test', args.domain, 'best_results') print('\n') for split in datasets.keys(): flag = False eval_dict = evaluation(args, datasets[split], split, best_model, args.domain, epoch, flag, 'best_results') else: logger.info("Evaluating") epoch = start_epoch # epoch = 70 # print('Start epoch is',start_epoch) for split in ['train', 'dev', 'test']: if split == 'dev': flag = False else: flag = False eval_dict = evaluation(args, datasets[split], split, model, args.domain, epoch, flag, 'best_results') # print_results(eval_dict, split, args.domain, 'results') print('\n')