config['train']['max_iter'] = args.max_iter if args.batch_size > 0: config['train']['batch_size'] = args.batch_size num_classes = int(config['model']['num_classes']) batch_size = int(config['train']['batch_size']) train_data = VFDataset([1, 3, 5, 8]) valid_data = VFDataset([4, 6]) test_data = VFDataset([2, 7]) class_weights = get_class_weights(num_classes=num_classes, train_data=train_data, valid_data=valid_data) train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True) valid_loader = DataLoader(valid_data, batch_size=batch_size, shuffle=False) test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False) device = torch.device('cuda') Model = model_dict[args.model] model = Model(n_classes=num_classes).to(device) model = nn.DataParallel(model) trainer = Trainer(model, config, train_loader, valid_loader, class_weights, device) trainer.train(vis_data=test_data) accuracy, precisions, recalls, IoUs = trainer.eval(test_loader) print('accuracy: {:.3f}'.format(accuracy)) print_stats(precisions, recalls, IoUs)
max_candidates = 80 max_candidates_len = 10 logger = preprocess.config_logger('main') model_path = 'models/' if not os.path.exists(model_path): os.makedirs(model_path) tokenize = TweetTokenizer().tokenize # if not os.path.isfile('{}.preprocessed.pickle'.format(in_file)) \ # or not os.path.isfile('{}.preprocessed.pickle'.format(dev_file)): # logger.error("train file or dev file is not exists.") # exit() if not evaluation_mode: logger.info('Load preprocessed train and dev file.') dataset = MyDataset(in_file, range(10), use_elmo, max_nodes, max_query_size, max_candidates, max_candidates_len) dev_dataset = MyDataset(dev_file, range(25, 30), use_elmo, max_nodes, max_query_size, max_candidates, max_candidates_len) logger.info("Data has prepared, train: %d, dev: %d." % (len(dataset), len(dev_dataset))) trainer = Trainer(dataset, dev_dataset, logger) trainer.train() else: logger.info('Load preprocessed evaluation data file.') dataset = MyDataset(dev_file, use_elmo, max_nodes, max_query_size, max_candidates, max_candidates_len) trainer = Trainer(dataset, dataset) trainer.eval()
return test_dataset if __name__ == "__main__": args = load_from_pickle('./checkpoint/config.pt') logger = get_logger( args.log_dir + "NER_Test_{}.txt".format(time.strftime("%m-%d_%H-%M-%S"))) logger.info(args) logger.info('load embedding......') embedding = c_data_utils.load_embedding(args, args.vocab) embedding = torch.tensor(embedding, dtype=torch.float) logger.info('load embedding done') trainer = Trainer(args, w_embedding=embedding) trainer.load() test_dataset = load_data(args, args.vocab) test_loader = DataLoader(dataset=test_dataset, batch_size=args.batch_size, shuffle=False) weighted_f1, macro_f1, p, r, acc, during_time, pred_labels, gold_labels = trainer.eval( test_dataset) logger.info( "Test Acc: {:.4f} P: {:.4f} R: {:.4f} F1:{:.4f} Time: {}".format( acc, p, r, weighted_f1, int(during_time))) logger.info(classification_report(gold_labels, pred_labels))