def main(): ############################################################################### # Load data ############################################################################### dictionary = data.Dictionary() train_corpus = data.Corpus(dictionary) dev_corpus = data.Corpus(dictionary) test_corpus = data.Corpus(dictionary) task_names = ['snli', 'multinli'] if args.task == 'allnli' else [args.task] for task in task_names: skip_first_line = True if task == 'sick' else False train_corpus.parse(task, args.data, 'train.txt', args.tokenize, num_examples=args.max_example, skip_first_line=skip_first_line) if task == 'multinli': dev_corpus.parse(task, args.data, 'dev_matched.txt', args.tokenize) dev_corpus.parse(task, args.data, 'dev_mismatched.txt', args.tokenize) test_corpus.parse(task, args.data, 'test_matched.txt', args.tokenize, is_test_corpus=False) test_corpus.parse(task, args.data, 'test_mismatched.txt', args.tokenize, is_test_corpus=False) else: dev_corpus.parse(task, args.data, 'dev.txt', args.tokenize, skip_first_line=skip_first_line) test_corpus.parse(task, args.data, 'test.txt', args.tokenize, is_test_corpus=False, skip_first_line=skip_first_line) print('train set size = ', len(train_corpus.data)) print('development set size = ', len(dev_corpus.data)) print('test set size = ', len(test_corpus.data)) print('vocabulary size = ', len(dictionary)) # save the dictionary object to use during testing helper.save_object(dictionary, args.save_path + args.task + '_dictionary.pkl') embeddings_index = helper.load_word_embeddings(args.word_vectors_directory, args.word_vectors_file, dictionary.word2idx) print('number of OOV words = ', len(dictionary) - len(embeddings_index)) # ############################################################################### # # Build the model # ############################################################################### model = SentenceClassifier(dictionary, embeddings_index, args) optim_fn, optim_params = helper.get_optimizer(args.optimizer) optimizer = optim_fn(filter(lambda p: p.requires_grad, model.parameters()), **optim_params) best_acc = 0 if args.cuda: model = model.cuda() if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_acc = checkpoint['best_acc'] model.load_state_dict(checkpoint['state_dict']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # ############################################################################### # # Train the model # ############################################################################### train = Train(model, optimizer, dictionary, embeddings_index, args, best_acc) bestmodel = train.train_epochs(train_corpus, dev_corpus, args.start_epoch, args.epochs) test_batches = helper.batchify(test_corpus.data, args.batch_size) if 'multinli' in task_names: print( 'Skipping evaluating best model. Evaluate using the test script.') else: test_accuracy, test_f1 = evaluate(bestmodel, test_batches, dictionary) print('accuracy: %.2f%%' % test_accuracy) print('f1: %.2f%%' % test_f1)
size_p = (args.size_p, args.size_p) # cropped local patch size sub_batch_size = args.sub_batch_size # batch size for train local patches ################################### print("creating models......") path_g = os.path.join(model_path, args.path_g) path_g2l = os.path.join(model_path, args.path_g2l) path_l2g = os.path.join(model_path, args.path_l2g) model, global_fixed = create_model_load_weights(n_class, mode, evaluation, path_g=path_g, path_g2l=path_g2l, path_l2g=path_l2g) ################################### num_epochs = args.num_epochs learning_rate = args.lr lamb_fmreg = args.lamb_fmreg optimizer = get_optimizer(model, mode, learning_rate=learning_rate) scheduler = LR_Scheduler('poly', learning_rate, num_epochs, len(dataloader_train)) ################################## criterion1 = FocalLoss(gamma=3) criterion2 = nn.CrossEntropyLoss() criterion3 = lovasz_softmax criterion = lambda x,y: criterion1(x, y) # criterion = lambda x,y: 0.5*criterion1(x, y) + 0.5*criterion3(x, y) mse = nn.MSELoss() if not evaluation: writer = SummaryWriter(log_dir=os.path.join(log_path, task_name)) f_log = open(os.path.join(log_path, task_name + ".log"), 'w')
batch_size, kwargs) nr_classes = 200 # Load the polars and update the trainy labels. classpolars = torch.from_numpy(np.load(args.hpnfile)).float() args.output_dims = int(args.hpnfile.split("/")[-1].split("-")[1][:-1]) # Load the model. if args.network == "resnet32": model = resnet.ResNet(32, args.output_dims, 1, classpolars) elif args.network == "densenet121": model = densenet.DenseNet121(args.output_dims, classpolars) model = model.to(device) # Load the optimizer. optimizer = helper.get_optimizer(args.optimizer, model.parameters(), \ args.learning_rate, args.momentum, args.decay) # Initialize the loss functions. f_loss = nn.CosineSimilarity(eps=1e-9).cuda() # Main loop. testscores = [] learning_rate = args.learning_rate for i in xrange(args.epochs): print "---" # Learning rate decay. if i in [args.drop1, args.drop2]: learning_rate *= 0.1 for param_group in optimizer.param_groups: param_group['lr'] = learning_rate
print('development set size = ', len(dev_corpus.data)) print('test set size = ', len(test_corpus.data)) print('vocabulary size = ', len(dictionary)) # save the dictionary object to use during testing helper.save_object(dictionary, args.save_path + 'dictionary.p') embeddings_index = helper.load_word_embeddings(args.word_vectors_directory, args.word_vectors_file, dictionary.word2idx) print('number of OOV words = ', len(dictionary) - len(embeddings_index)) # ############################################################################### # # Build the model # ############################################################################### model = SentenceClassifier(dictionary, embeddings_index, args) optim_fn, optim_params = helper.get_optimizer(args.optimizer) optimizer = optim_fn(filter(lambda p: p.requires_grad, model.parameters()), **optim_params) best_acc = 0 # for training on multiple GPUs. use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use if 'CUDA_VISIBLE_DEVICES' in os.environ: cuda_visible_devices = [int(x) for x in os.environ['CUDA_VISIBLE_DEVICES'].split(',')] if len(cuda_visible_devices) > 1: model = torch.nn.DataParallel(model, device_ids=cuda_visible_devices) if args.cuda: model = model.cuda() if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume)
embeddings_index = helper.load_word_embeddings(args.word_vectors_directory, args.word_vectors_file, dictionary.word2idx) print('number of OOV words = ', len(dictionary) - len(embeddings_index)) # ############################################################################### # # Build the model # ############################################################################### model = LSTM(dictionary, embeddings_index, args) selector = Selector(dictionary, embeddings_index, args) print(selector) print(model) optim_fn_selector, optim_params_selector = helper.get_optimizer(args.optimizer) optimizer_selector = optim_fn_selector( filter(lambda p: p.requires_grad, selector.parameters()), **optim_params_selector) optim_fn, optim_params = helper.get_optimizer(args.optimizer) optimizer = optim_fn(filter(lambda p: p.requires_grad, model.parameters()), **optim_params) best_acc = 0 param_dict_selector = helper.count_parameters(selector) param_dict = helper.count_parameters(model) print( 'number of trainable parameters = ', numpy.sum(list(param_dict_selector.values())), numpy.sum(list(param_dict.values())), numpy.sum(list(param_dict.values())) +
model = model.to(device) # To CUDA. if args.multigpu == 1: model = torch.nn.DataParallel(model.cuda()) else: model = model.to(device) # Network parameters. optimname = args.optimizer lr = args.learning_rate momentum = args.momentum decay = args.decay params = model.parameters() # Set the optimizer. optimizer = helper.get_optimizer(optimname, params, lr, momentum, decay) # Initialize the loss functions. f_loss = nn.CosineSimilarity(eps=1e-9).cuda() resdir = args.resdir + "omniart/" args.do_norm = 1 testscores = [] # Iterative optimization. for i in xrange(args.epochs): print "---" # Update learning rate. if i in [args.drop1, args.drop2]: lr = lr * 0.1 for param_group in optimizer.param_groups:
def main(): # if output directory doesn't exist, create it if not os.path.exists(args.save_path): os.makedirs(args.save_path) # set the random seed manually for reproducibility. numpy.random.seed(args.seed) torch.manual_seed(args.seed) if torch.cuda.is_available(): if not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) else: torch.cuda.manual_seed(args.seed) print('\ncommand-line params : {0}\n'.format(sys.argv[1:])) print('{0}\n'.format(args)) ############################################################################### # Load data ############################################################################### dictionary = data.Dictionary() tasks = [] train_dict, dev_dict = {}, {} if 'quora' in args.task: print('**Task name : Quora**') # load quora dataset quora_train = data.Corpus(args.data, dictionary) quora_train.parse('quora/train.txt', 'quora', args.tokenize, args.max_example) print('Found {} pairs of train sentences.'.format(len( quora_train.data))) quora_dev = data.Corpus(args.data, dictionary) quora_dev.parse('quora/dev.txt', 'quora', args.tokenize) print('Found {} pairs of dev sentences.'.format(len(quora_dev.data))) quora_test = data.Corpus(args.data, dictionary) quora_test.parse('quora/test.txt', 'quora', args.tokenize) print('Found {} pairs of test sentences.'.format(len(quora_test.data))) tasks.append(('quora', 2)) train_dict['quora'] = quora_train dev_dict['quora'] = quora_dev if 'snli' in args.task: print('**Task name : SNLI**') # load snli dataset snli_train = data.Corpus(args.data, dictionary) snli_train.parse('snli/train.txt', 'snli', args.tokenize, args.max_example) print('Found {} pairs of train sentences.'.format(len( snli_train.data))) snli_dev = data.Corpus(args.data, dictionary) snli_dev.parse('snli/dev.txt', 'snli', args.tokenize) print('Found {} pairs of dev sentences.'.format(len(snli_dev.data))) snli_test = data.Corpus(args.data, dictionary) snli_test.parse('snli/test.txt', 'snli', args.tokenize) print('Found {} pairs of test sentences.'.format(len(snli_test.data))) tasks.append(('snli', 3)) train_dict['snli'] = snli_train dev_dict['snli'] = snli_dev if 'multinli' in args.task: print('**Task name : Multi-NLI**') # load multinli dataset multinli_train = data.Corpus(args.data, dictionary) multinli_train.parse('multinli/train.txt', 'multinli', args.tokenize, args.max_example) print('Found {} pairs of train sentences.'.format( len(multinli_train.data))) multinli_dev = data.Corpus(args.data, dictionary) multinli_dev.parse('multinli/dev_matched.txt', 'multinli', args.tokenize) multinli_dev.parse('multinli/dev_mismatched.txt', 'multinli', args.tokenize) print('Found {} pairs of dev sentences.'.format(len( multinli_dev.data))) multinli_test = data.Corpus(args.data, dictionary) multinli_test.parse('multinli/test_matched.txt', 'multinli', args.tokenize) multinli_test.parse('multinli/test_mismatched.txt', 'multinli', args.tokenize) print('Found {} pairs of test sentences.'.format( len(multinli_test.data))) tasks.append(('multinli', 3)) train_dict['multinli'] = multinli_train dev_dict['multinli'] = multinli_dev if 'allnli' in args.task: print('**Task name : AllNLI**') # load allnli dataset allnli_train = data.Corpus(args.data, dictionary) allnli_train.parse('snli/train.txt', 'snli', args.tokenize, args.max_example) allnli_train.parse('multinli/train.txt', 'multinli', args.tokenize, args.max_example) print('Found {} pairs of train sentences.'.format( len(allnli_train.data))) allnli_dev = data.Corpus(args.data, dictionary) allnli_dev.parse('snli/dev.txt', 'snli', args.tokenize) allnli_dev.parse('multinli/dev_matched.txt', 'multinli', args.tokenize) allnli_dev.parse('multinli/dev_mismatched.txt', 'multinli', args.tokenize) print('Found {} pairs of dev sentences.'.format(len(allnli_dev.data))) allnli_test = data.Corpus(args.data, dictionary) allnli_test.parse('snli/test.txt', 'snli', args.tokenize) allnli_test.parse('multinli/test_matched.txt', 'multinli', args.tokenize) allnli_test.parse('multinli/test_mismatched.txt', 'multinli', args.tokenize) print('Found {} pairs of test sentences.'.format(len( allnli_test.data))) tasks.append(('allnli', 3)) train_dict['allnli'] = allnli_train dev_dict['allnli'] = allnli_dev print('\nvocabulary size = ', len(dictionary)) # save the dictionary object to use during testing helper.save_object(dictionary, args.save_path + 'dictionary.p') embeddings_index = helper.load_word_embeddings(args.word_vectors_directory, args.word_vectors_file, dictionary.word2idx) print('number of OOV words = ', len(dictionary) - len(embeddings_index)) # ############################################################################### # # Build the model # ############################################################################### if not tasks: return model = MultitaskDomainAdapter(dictionary, embeddings_index, args, tasks) print(model) optim_fn, optim_params = helper.get_optimizer(args.optimizer) optimizer = optim_fn(filter(lambda p: p.requires_grad, model.parameters()), **optim_params) best_accuracy = 0 # for training on multiple GPUs. use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use if 'CUDA_VISIBLE_DEVICES' in os.environ: cuda_visible_devices = [ int(x) for x in os.environ['CUDA_VISIBLE_DEVICES'].split(',') ] if len(cuda_visible_devices) > 1: model = torch.nn.DataParallel(model, device_ids=cuda_visible_devices) if args.cuda: model = model.cuda() if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_accuracy = checkpoint['best_acc'] model.load_state_dict(checkpoint['state_dict']['model']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) # ############################################################################### # # Train the model # ############################################################################### train = Train(model, optimizer, dictionary, embeddings_index, args, best_accuracy) train.set_train_dev_corpus(train_dict, dev_dict) train.train_epochs(args.start_epoch, args.epochs)
helper.log( logger, '[train] Shape of data placeholder {0}'.format(data.get_shape())) helper.log( logger, '[train] Shape of label placeholder {0}'.format(label.get_shape())) # Create model with tf.name_scope('model'): model = model.Model(logger) # Get train opt with tf.name_scope('train'): train_logit = model.logit(data, True, config.dropout) train_cost = helper.get_loss(train_logit, label) train_opt = helper.get_optimizer(config.learning_rate, config.optimizer).minimize(train_cost) train_pred = tf.argmax(tf.nn.softmax(train_logit), axis=1, name='train_pred') train_equal = tf.equal(train_pred, label) train_acc = tf.reduce_mean(tf.cast(train_equal, tf.float32)) train_summary_list = [] train_summary_list.append(tf.summary.scalar('train_cost', train_cost)) train_summary_list.append(tf.summary.scalar('train_acc', train_acc)) train_summary_merge = tf.summary.merge(train_summary_list) # get eval opt with tf.name_scope('eval'): tf.get_variable_scope().reuse_variables()
print("creating models......") path_g = os.path.join(model_path, args.path_g) path_g2l = os.path.join(model_path, args.path_g2l) path_l2g = os.path.join(model_path, args.path_l2g) model, global_fixed = create_model_load_weights(n_class, mode, evaluation, path_g=path_g, path_g2l=path_g2l, path_l2g=path_l2g) model_ddp = DDP(model, device_ids=[local_rank], output_device=local_rank) ################################### num_epochs = args.num_epochs learning_rate = args.lr lamb_fmreg = args.lamb_fmreg optimizer = get_optimizer(model_ddp, mode, parallel=True, learning_rate=learning_rate) scheduler = LR_Scheduler('poly', learning_rate, num_epochs, len(dataloader_train)) ################################## criterion1 = FocalLoss(gamma=3) criterion2 = nn.CrossEntropyLoss() criterion3 = lovasz_softmax criterion = lambda x,y: criterion1(x, y) # criterion = lambda x,y: 0.5*criterion1(x, y) + 0.5*criterion3(x, y) mse = nn.MSELoss() if not evaluation: writer = SummaryWriter(logdir=log_path + task_name) f_log = open(log_path + task_name + ".log", 'w')