def main(): dataset = read_data_sets(args.data_dir, validation_size=args.validation_size) # init or load model rbm = RBM(args) if args.load: rbm.load( os.path.join(*[ args.log_dir, args.run_name, 'ckpts', 'model_ep' + str(args.ckpt_epoch), 'model_ep' + str(args.ckpt_epoch) ])) # create directories if not already made if not os.path.exists(os.path.join(args.log_dir, args.run_name)): os.mkdir(os.path.join(args.log_dir, args.run_name)) else: if args.save: print("Warning: might overwrite previous samples & ckpts") if not os.path.exists( os.path.join(*[args.log_dir, args.run_name, 'ckpts'])): os.mkdir(os.path.join(*[args.log_dir, args.run_name, 'ckpts'])) if not os.path.exists( os.path.join(*[args.log_dir, args.run_name, 'samples'])): os.mkdir(os.path.join(*[args.log_dir, args.run_name, 'samples'])) # train and/or eval if args.train: train(args, model=rbm, data=dataset) if args.eval: evaluate(args, model=rbm, data=dataset)
def train_text_cnn(argv=None): # Load dataset train_dl, valid_dl, test_dl, TEXT, _ = get_dataloaders(SEED, args) # Create net filter_sizes = [int(i) for i in args.filter_sizes.split(',')] num_vocab = len(TEXT.vocab) EMB_DIM = 100 pad_idx = TEXT.vocab.stoi[TEXT.pad_token] output_dim = 2 print('Dictionary size: {}'.format(num_vocab)) text_cnn = TextCNN(num_vocab, EMB_DIM, args.num_filters, filter_sizes, output_dim, args.dropout_r, pad_idx).to(args.device) # Load the pretrained_embedding pretrained_embeddings = TEXT.vocab.vectors text_cnn.embedding.weight.data.copy_(pretrained_embeddings) # Init unknown words and pad words embedding unk_idx = TEXT.vocab.stoi[TEXT.unk_token] text_cnn.embedding.weight.data[unk_idx] = torch.zeros(EMB_DIM) text_cnn.embedding.weight.data[pad_idx] = torch.zeros(EMB_DIM) text_cnn.embedding.requires_grad = False # setup loss and optimizer loss_func = torch.nn.CrossEntropyLoss() acc_func = categorical_accuracy opt = torch.optim.Adam(text_cnn.parameters(), lr=args.lr) # Start train for epoch in range(args.epoch): train_single_epoch(text_cnn, loss_func, acc_func, train_dl, opt, epoch) evaluate(text_cnn, loss_func, acc_func, test_dl, epoch)
def trainer(model, train_dl, test_dl, data_id, config, params): # criteierion criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=params['lr']) scheduler = StepLR(optimizer, step_size=10, gamma=0.8) target_names = ['Healthy','D1','D2','D3','D4','D5','D6','D7', 'D8','D9','D10'] for epoch in range(params['pretrain_epoch']): start_time = time.time() train_loss, train_pred, train_labels = train(model, train_dl, optimizer, criterion, config) scheduler.step() # log time end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) # printing results print(f'Epoch: {epoch + 1:02} | Time: {epoch_mins}m {epoch_secs}s') print(f'\tTrain Loss: {train_loss:.3f}') # Evaluate on the test set test_loss,_, _= evaluate(model, test_dl, criterion, config) print('=' * 89) print(f'\t Performance on test set::: Loss: {test_loss:.3f} ')#| Score: {test_score:7.3f}') train_labels = torch.stack(train_labels).view(-1) train_pred = torch.stack(train_pred).view(-1) print(classification_report(train_labels, train_pred, target_names=target_names)) # saving last epoch model # checkpoint1 = {'model': model, # 'epoch': epoch, # 'state_dict': model.state_dict(), # 'optimizer': optimizer.state_dict()} # torch.save(checkpoint1, # f'./checkpoints/{config["model_name"]}/pretrained_{config["model_name"]}_{data_id}_tuned.pt') # Evaluate on the test set test_loss, y_pred, y_true = evaluate(model, test_dl, criterion, config) print('=' * 89) print(f'\t Performance on test set:{data_id}::: Loss: {test_loss:.3f} ')#| Score: {test_score:7.3f}') print('=' * 89) y_true = torch.stack(y_true).view(-1) y_pred = torch.stack(y_pred).view(-1) print(classification_report(y_true, y_pred, target_names=target_names)) print('| End of Pre-training |') print('=' * 89) return model
def main(config): if not os.path.exists(config.model_dir): os.makedirs(config.model_dir) if not os.path.exists(config.log_dir): os.makedirs(config.log_dir) print("\t \t \t the model name is {}".format(config.model_name)) device, n_gpu = get_device() torch.manual_seed(config.seed) np.random.seed(config.seed) torch.manual_seed(config.seed) if n_gpu > 0: torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.deterministic = True # cudnn 使用确定性算法,保证每次结果一样 '''数据准备''' text_field = data.Field(tokenize='spacy', lower=True, include_lengths=True, fix_length=config.sequence_length) label_field = data.LabelField(dtype=torch.long) train_iterator, dev_iterator, test_iterator = load_sst2( config.data_path, text_field, label_field, config.batch_size, device, config.glove_word_file, config.cache_path) '''词向量准备''' pretrained_embeddings = text_field.vocab.vectors model_file = config.model_dir + 'model1.pt' '''模型准备''' if config.model_name == 'TextRNN': from TextRNN import TextRNN model = TextRNN.TextRNN(config.glove_word_dim, config.output_dim, config.hidden_size, config.num_layers, config.bidirectional, config.dropout, pretrained_embeddings) optimizer = optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss() if config.do_train: train(config.epoch_num, model, train_iterator, dev_iterator, optimizer, criterion, ['0', '1'], model_file, config.log_dir, config.print_step, 'word') model.load_state_dict(torch.load(model_file)) test_loss, test_acc, test_report = evaluate(model, test_iterator, criterion, ['0', '1'], 'word') print("-------------- Test -------------") print("\t Loss: {} | Acc: {} | Macro avg F1: {} | Weighted avg F1: {}". format(test_loss, test_acc, test_report['macro avg']['f1-score'], test_report['weighted avg']['f1-score']))
def generateHotWords(self, train_data, train_labels, train_no=None, method="blank", max_batch=256): """ Yields changes in classifier by word, based on blanking words in each review train_data: array of sentences train_labels: array of ints train_no: int, limit number to train on. default do whole array method: str, one of "blank", or "synonym". Either blanks words, or replaces them with a synonym max_batch: max batch to evaluate on """ if not train_no: train_no = len(train_data) model = self.model tokenizer = self.tokenizer labels = train_labels blank_review_iterator = self.createBlankedReviews( train_data[0:train_no], method=method) for i, blanks in enumerate(blank_review_iterator): # Unpack the blanked reviews and words base_review, blanked_reviews, blanked_words = blanks blanked_words = np.array(blanked_words) # Set up the evaluation data all_reviews = np.array([base_review, *blanked_reviews]) true_labels = np.full(len(all_reviews), labels[i]) evaluation_data, _ = train_eval.ReviewDataset.setUpData( all_reviews, true_labels, tokenizer) # Run model to get softmax outputs batch = min([len(all_reviews), max_batch]) _, return_pred_labels, sm, _, _ = train_eval.evaluate( model, evaluation_data, batch_size=batch, return_pred_labels=True) return_pred_labels = np.array(return_pred_labels)[0, :] # Keep changes soft max gross_change = np.sum(sm[0][1:, :] - sm[0][0, :], axis=1) # misclassify_words = np.where(return_pred_labels[1:] != return_pred_labels[0]) # hot_words.append(blanked_words[misclassify_words]) if i % 50 == 0: print("{}: Reviews complete: {}".format(time.ctime(), i)) yield blanked_words, gross_change
def generateAttentions(self, train_data, train_labels, max_batch=256): """ Return word attentions for each review """ self.model.config.output_attentions = True self.evaluation_data, _ = train_eval.ReviewDataset.setUpData( train_data, train_labels, self.tokenizer) _, _, _, attentions, _ = train_eval.evaluate(self.model, self.evaluation_data, batch_size=max_batch, return_attentions=True) self.model.config.output_attentions = False return attentions
def single_train(config, index): from global_constants import ConfigEnums, main_device ce = ConfigEnums save_path = config[ce.save_path] save_model = config[ce.save_model] config[ce.save_path] = config[ce.save_path] if config[ ce.save_model] else None config[ce.model] = config[ce.model].to(main_device) final_logger = loggers.final_logger train_params = get_params(config, train) new_model, train_losses = train(**train_params) new_model = get_module_from_parallel(new_model) config[ce.dataset] = config[ce.evalset] eval_params = get_params(config, evaluate) perplexity, perplexities, eval_losses = evaluate(**eval_params) refuse = False loss = torch.mean(eval_losses) log_info(final_logger, 'final mean loss {}'.format(loss)) # if loss > config[ce.prev_eval_loss]: # new_model.load_state_dict(model_state) # refuse = True # log_info(final_logger, 'loss {} is high, refused'.format(index)) # loss = config[ce.prev_eval_loss] # else: # config[ce.prev_eval_loss] = loss if save_path is not None: if save_model and not refuse: new_model = get_module_from_parallel(new_model) tokenizer = get_module_from_parallel(config[ce.tokenizer]) log_info(final_logger, 'saving trained models: ' + save_path) new_model.save_pretrained(save_path) tokenizer.save_pretrained(save_path) log_path = list(os.path.split(save_path)[:-1]) log_path.append('log') log_path.append(str(index) + '/') log_path = '/'.join(log_path) if not os.path.exists(log_path): os.mkdir(log_path) log_info(final_logger, 'saving training losses') torch.save(train_losses, log_path + 'train_losses.pt') log_info(final_logger, 'saving evaluation losses') torch.save(eval_losses, log_path + 'eval_losses.pt') torch.save(perplexity, log_path + 'perplexity.pt') torch.save(perplexities, log_path + 'perplexities.pt') log_info(final_logger, 'mean eval losses {}'.format(torch.mean(eval_losses))) log_info(final_logger, 'All saved') return new_model, loss
evaluateL2, evaluateL1, args) print( '| end of epoch {:3d} | time used: {:5.2f}s | train_loss {:5.4f} | valid rse {:5.4f} | valid rae {:5.4f} | valid corr {:5.4f}' .format(epoch, (time.time() - epoch_start_time), train_loss, val_loss, val_rae, val_corr)) if val_loss < best_val: with open(args.save, 'wb') as f: torch.save(model, f) best_val = val_loss if epoch % 10 == 0: test_acc, test_rae, test_corr = eval_method( Data, Data.test[0], Data.test[1], model, evaluateL2, evaluateL1, args) print( "| test rse {:5.4f} | test rae {:5.4f} | test corr {:5.4f}\n". format(test_acc, test_rae, test_corr)) except KeyboardInterrupt: print('-' * 89) print('Exiting from training early') # Load the best saved model. with open(args.save, 'rb') as f: model = torch.load(f) test_acc, test_rae, test_corr = evaluate(Data, Data.test[0], Data.test[1], model, evaluateL2, evaluateL1, args) print('Best model performance:') print("| test rse {:5.4f} | test rae {:5.4f} | test corr {:5.4f}".format( test_acc, test_rae, test_corr))
def main(config): if not os.path.exists(config.model_dir): os.makedirs(config.model_dir) if not os.path.exists(config.log_dir): os.makedirs(config.log_dir) print("\t \t \t the model name is {}".format(config.model_name)) device, n_gpu = get_device() torch.manual_seed(config.seed) np.random.seed(config.seed) torch.manual_seed(config.seed) if n_gpu > 0: torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.deterministic = True # cudnn 使用确定性算法,保证每次结果一样 """ sst2 数据准备 """ CHAR_NESTING = data.Field(tokenize=list, lower=True) char_field = data.NestedField(CHAR_NESTING, tokenize='spacy', fix_length=config.sequence_length) word_field = data.Field(tokenize='spacy', lower=True, include_lengths=True, fix_length=config.sequence_length) label_field = data.LabelField(dtype=torch.long) train_iterator, dev_iterator, test_iterator = sst_word_char( config.data_path, word_field, char_field, label_field, config.batch_size, device, config.glove_word_file, config.glove_char_file) """ 词向量准备 """ word_embeddings = word_field.vocab.vectors char_embeddings = char_field.vocab.vectors model_file = config.model_dir + 'model1.pt' """ 模型准备 """ if config.model_name == "TextRNNHighway": model = TextRNNHighway.TextRNNHighway( config.glove_word_dim, config.glove_char_dim, config.output_dim, config.hidden_size, config.num_layers, config.bidirectional, config.dropout, word_embeddings, char_embeddings, config.highway_layers) elif config.model_name == "TextCNNHighway": filter_sizes = [int(val) for val in config.filter_sizes.split()] model = TextCNNHighway.TextCNNHighway( config.glove_word_dim, config.glove_char_dim, config.filter_num, filter_sizes, config.output_dim, config.dropout, word_embeddings, char_embeddings, config.highway_layers) elif config.model_name == "LSTMATTHighway": model = LSTMATTHighway.LSTMATTHighway( config.glove_word_dim, config.glove_char_dim, config.output_dim, config.hidden_size, config.num_layers, config.bidirectional, config.dropout, word_embeddings, char_embeddings, config.highway_layers) elif config.model_name == "TextRCNNHighway": model = TextRCNNHighway.TextRCNNHighway( config.glove_word_dim, config.glove_char_dim, config.output_dim, config.hidden_size, config.num_layers, config.bidirectional, config.dropout, word_embeddings, char_embeddings, config.highway_layers) optimizer = optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss() model = model.to(device) criterion = criterion.to(device) if config.do_train: train(config.epoch_num, model, train_iterator, dev_iterator, optimizer, criterion, ['0', '1'], model_file, config.log_dir, config.print_step, 'highway') model.load_state_dict(torch.load(model_file)) criterion = nn.CrossEntropyLoss() test_loss, test_acc, test_report = evaluate(model, test_iterator, criterion, ['0', '1'], 'highway') print("-------------- Test -------------") print( "\t Loss: {} | Acc: {} | Micro avg F1: {} | Macro avg F1: {} | Weighted avg F1: {}" .format(test_loss, test_acc, test_report['micro avg']['f1-score'], test_report['macro avg']['f1-score'], test_report['weighted avg']['f1-score']))
config = { "n": 10, "lambda": 1, "n_iters": 3, "norm": False, "base_k": 'subtree' } ### Load Data if not args['split_ready']: X_train, Y_train, X_test, Y_test = get_dataset(args) elif args['eval_on_valid']: X_train, Y_train = get_splitted(args['trainfile']) X_test, Y_test = get_splitted(args['validfile']) else: X_train, Y_train = get_splitted(args['trainfile']) X_valid, Y_valid = get_splitted(args['validfile']) if X_train is not None and X_valid is not None: X_train = np.concatenate([X_train, X_valid]) if Y_train is not None and Y_valid is not None: Y_train = np.concatenate([Y_train, Y_valid]) X_test, Y_test = get_splitted(args['testfile']) ### Run Model if args['mode'] in ['train', 'train_eval']: train_eval(config, args, X_train, Y_train, X_test, Y_test) elif args['mode'] == 'evaluate': evaluate(args, X_test, Y_test) else: predict(args, X_test)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model_type", default='bert', type=str, help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys())) parser.add_argument( "--model_name_or_path", default='bert-base-uncased', type=str, help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS)) parser.add_argument( "--output_dir", default='../output_mc', type=str, help= "The output directory where the model checkpoints and predictions will be written." ) parser.add_argument("--raw_data_dir", default='../data_mc', type=str) parser.add_argument( "--config_name", default="", type=str, help="Pretrained config name or path if not the same as model_name") parser.add_argument( "--tokenizer_name", default="", type=str, help="Pretrained tokenizer name or path if not the same as model_name") parser.add_argument( "--max_seq_length", default=384, type=int, help= "The maximum total input sequence length after WordPiece tokenization. Sequences " "longer than this will be truncated, and sequences shorter than this will be padded." ) parser.add_argument("--task_name", default='DREAM') parser.add_argument("--pre_model_dir", default='2020-03-12-10-58-checkpoint-3048') parser.add_argument("--do_train", action='store_true', help="Whether to run training.") parser.add_argument("--do_eval", action='store_true', help="Whether to run eval on the dev set.") parser.add_argument("--do_test", action='store_true', help='Whether to run test on the test set') parser.add_argument( "--evaluate_during_training", action='store_true', help="Rul evaluation during training at each logging step.") parser.add_argument( "--do_lower_case", action='store_true', help="Set this flag if you are using an uncased model.") parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.") parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation.") parser.add_argument("--learning_rate", default=3e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument( '--gradient_accumulation_steps', type=int, default=1, help= "Number of updates steps to accumulate before performing a backward/update pass." ) parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight deay if we apply some.") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") parser.add_argument("--num_train_epochs", default=2.0, type=float, help="Total number of training epochs to perform.") parser.add_argument( "--max_steps", default=-1, type=int, help= "If > 0: set total number of training steps to perform. Override num_train_epochs." ) parser.add_argument("--warmup_proportion", default=0.1, type=float, help="Linear warmup over warmup_steps.") parser.add_argument( "--verbose_logging", action='store_true', help= "If true, all of the warnings related to data processing will be printed. " "A number of warnings are expected for a normal SQuAD evaluation.") parser.add_argument( "--eval_all_checkpoints", action='store_true', help= "Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number" ) parser.add_argument("--no_cuda", action='store_true', help="Whether not to use CUDA when available") parser.add_argument('--overwrite_output_dir', action='store_true', help="Overwrite the content of the output directory") parser.add_argument( '--overwrite_cache', action='store_true', help="Overwrite the cached training and evaluation sets") parser.add_argument('--seed', type=int, default=42, help="random seed for initialization") parser.add_argument("--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus") parser.add_argument( '--fp16', action='store_true', help= "Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit" ) parser.add_argument( '--fp16_opt_level', type=str, default='O1', help= "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "See details at https://nvidia.github.io/apex/amp.html") args = parser.parse_args() args.checkpoint = os.path.join(args.output_dir, args.pre_model_dir) if os.path.exists(args.output_dir) and os.listdir( args.output_dir ) and args.do_train and not args.overwrite_output_dir: logger.info( "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome." .format(args.output_dir)) # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) torch.distributed.init_process_group(backend='nccl') args.n_gpu = 1 args.device = device # Set seed set_seed(args) args.model_type = args.model_type.lower() config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] config = config_class.from_pretrained( args.config_name if args.config_name else args.model_name_or_path) config.output_hidden_states = True config.num_options = int( MULTIPLE_CHOICE_TASKS_NUM_LABELS[args.task_name.lower()]) tokenizer = tokenizer_class.from_pretrained( args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case) post_model = Post_MV(args, config) post_model.to(args.device) logger.info("Training/evaluation parameters %s", args) if args.fp16: try: import apex apex.amp.register_half_function(torch, 'einsum') except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) if args.do_train: logging.getLogger("transformers.tokenization_utils").setLevel( logging.ERROR) # Reduce logging train_dataset = load_and_cache_examples(args, task=args.task_name, tokenizer=tokenizer, evaluate=False) global_step, tr_loss = train_process(args, train_dataset, post_model, tokenizer) logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) if args.do_test: logging.getLogger("transformers.modeling_utils").setLevel( logging.WARN) # Reduce logging logging.getLogger("transformers.configuration_utils").setLevel( logging.WARN) # Reduce logging logging.getLogger("transformers.tokenization_utils").setLevel( logging.ERROR) # Reduce logging checkpoint = os.path.join(args.output_dir, args.pre_model_dir + '-TIE') logger.info(" Load model from %s", checkpoint) post_model.load_state_dict( torch.load(os.path.join(checkpoint, 'pytorch_model.bin'))) post_model.to(args.device) task_string = [ '', '-Add1OtherTruth2Opt', '-Add2OtherTruth2Opt', '-Add1PasSent2Opt', '-Add1NER2Pass' ] task_string = [args.task_name + item for item in task_string] result = evaluate(args, task_string, post_model, tokenizer, test=True)
ax = fig.add_subplot(111) ax.plot(true_data, label='True Data') ax.plot(predicted_data, label='Prediction') plt.legend() plt.show() figname = str(args.model) + "_" + str(args.horizon) + "_" + str( args.data[-8:-5]) + ".png" plt.savefig(figname) plt.close() # Load the best saved model. with open(args.save, 'rb') as f: model = torch.load(f) test_acc, test_rae, test_corr, predict = evaluate(Data, Data.test[0], Data.test[1], model, evaluateL2, evaluateL1, args, True) # if(args.model=="LSTNet"): # cnn = model.returnCNN() # temp = cnn[0,0,:,0] # print(temp) # fig2 = plt.figure() # plt.plot(temp) # plt.savefig("cnn.png") # plt.close() pred = np.array(predict[:, 0]) base = np.array(Data.test[1].data.cpu().numpy()[:, 0]) plot_results(pred, base)
args.weight_decay).cuda() embedding_oprimizer = torch.optim.Adagrad(model.embedding_parameters(), args.lr) mlp_optimizer = torch.optim.Adagrad(model.mlp_parameters(), args.lr) losses = [] for train_epoch in range(args.train_epochs): if dim == 2: loss = train_single(train_queue, model, embedding_oprimizer, args) if train_epoch % 15000 == 0: _ = train_single(valid_queue, model, mlp_optimizer, args) _ = train_single(valid_queue, model, mlp_optimizer, args) rmse = evaluate(model, test_queue) elif dim == 3: loss = train_single_triple(train_queue, model, embedding_oprimizer, args) _ = train_single_triple(valid_queue, model, mlp_optimizer, args) rmse = evaluate_triple(model, test_queue) losses.append(loss) logging.info('train_epoch: %d, loss: %.4f, rmse: %.4f[%.4f]' % (train_epoch, loss, rmse, time() - start)) elif args.mode == 'random_nas': search_start = time() performance = {} best_arch, best_rmse = None, 100000
mnist_model = gf.move_to_device(mnist_model, device) # Train MLP model history = te.train_model(model=mnist_model, epochs=n_epochs, lr=0.01, train_loader=train_loader, val_loader=test_loader, opt_func=torch.optim.SGD) # Visualize loss and accuracy history pl.plot_accuracy(history) pl.plot_losses(history) # Evaluate final model scores = te.evaluate(model=mnist_model, val_loader=test_loader) print('Test scores: ', scores) # Predict on a few inputs test_dataset = MNIST(root='data/', train=False, transform=transforms.ToTensor()) x, label = dataset[0] x = x.unsqueeze(0) pred = te.predict(x=x, model=mnist_model) print('True label: {}, Predicted: {}'.format(label, pred)) x, label = dataset[111] x = x.unsqueeze(0) pred = te.predict(x=x, model=mnist_model) print('True label: {}, Predicted: {}'.format(label, pred))
def main(): logging.basicConfig(filename='logs' + os.sep + 'example.log', level=logging.DEBUG) data_transforms = { 'train': T.Compose([ T.ToOriginalHU(INTENSITY_OFFSET), T.IntensityWindowing(WINDOWING), T.SpacingResize(NORM_SPACING, MAX_SIZE), T.ToTensor() ]), 'val': T.Compose([ T.ToOriginalHU(INTENSITY_OFFSET), T.IntensityWindowing(WINDOWING), T.SpacingResize(NORM_SPACING, MAX_SIZE), T.ToTensor() ]), 'test': T.Compose([ T.ToOriginalHU(INTENSITY_OFFSET), T.IntensityWindowing(WINDOWING), T.SpacingResize(NORM_SPACING, MAX_SIZE), T.ToTensor() ]) } logging.info('Loading data sets') image_datasets = { x: DeepLesion(DIR_IN + os.sep + x, GT_FN_DICT[x], data_transforms[x]) for x in ['train', 'val', 'test'] } logging.info('data sets loaded') logging.info('Loading data loaders') dl_dataloaders = { x: DataLoader(image_datasets[x], batch_size=3, shuffle=True, num_workers=0, collate_fn=BatchCollator) for x in ['train', 'val', 'test'] } logging.info('data loaders loaded\n') dl_dataset_sizes = { x: len(image_datasets[x]) for x in ['train', 'val', 'test'] } # for batch_id, (inputs, targets) in enumerate(dl_dataloaders['train']): # i = 0 # for i, (image, target) in enumerate(zip(inputs, targets)): # img_copy = image.squeeze().numpy() # images = [img_copy] * 3 # images = [im.astype(float) for im in images] # img_copy = cv2.merge(images) # for j, (bbox, pseudo_mask) in enumerate(zip(target["boxes"], target["masks"])): # bbox = target["boxes"][j].squeeze().numpy() # bbox = np.int16(bbox) # mask = target["masks"][j].squeeze().numpy() # cv2.rectangle(img_copy, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (0, 255, 0), 1) # msk_idx = np.where(mask == 1) # img_copy[msk_idx[0], msk_idx[1], 0] = 255 # cv2.imshow(str(batch_id) + " " + str(i), img_copy) # # cv2.waitKey(0) # cv2.destroyAllWindows() dl_model = get_model(False, True, 2) params = [p for p in dl_model.parameters() if p.requires_grad] # Observe that not all parameters are being optimized optimizer_ft = SGD(params, lr=0.001, momentum=0.9, weight_decay=0.0001) # optimizer_ft = Adam(params, lr=0.001) # Decay LR by a factor of 0.1 every 7 epochs # exp_lr_scheduler = lr_scheduler.StepLR(optimizer_ft, step_size=4, gamma=0.1) # exp_lr_scheduler = lr_scheduler.CosineAnnealingLR(optimizer_ft, T_max=100) num_epochs = 10 since = time.time() # best_model_wts = copy.deepcopy(dl_model.state_dict()) # best_llf = 0 # best_nlf = 999 logging.info('momentum:' + str(optimizer_ft.state_dict()['param_groups'][0]['momentum'])) logging.info( 'weight_decay:' + str(optimizer_ft.state_dict()['param_groups'][0]['weight_decay'])) # logging.info('LR decay gamma:' + str(exp_lr_scheduler.state_dict()['gamma'])) # logging.info('LR decay step size:' + str(exp_lr_scheduler.state_dict()['step_size']) + '\n') for epoch in range(num_epochs): # deep_copy_flag = False logging.info('Epoch {}/{}'.format(epoch, num_epochs - 1)) logging.info('-' * 20) train_one_epoc(dl_model, optimizer_ft, dl_dataloaders['train'], dl_dataset_sizes['train']) llf, nlf = evaluate(dl_model, dl_dataloaders['val']) logging.info('LLF: {}'.format(llf)) logging.info('NLF: {}'.format(nlf) + '\n') # exp_lr_scheduler.step() # if llf > best_llf: # deep_copy_flag = True # best_nlf = nlf # best_llf = llf # elif (llf == best_llf) & (nlf < best_nlf): # deep_copy_flag = True # best_nlf = nlf # if deep_copy_flag: best_model_wts = copy.deepcopy(dl_model.state_dict()) torch.save(best_model_wts, 'saved_models' + os.sep + str(epoch) + '_deeplesion.pth') time_elapsed = time.time() - since logging.info('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60))
training = 1 file_name = "with_attention_plot.png" source_vocab, target_vocab, train_pairs, eval_pairs = utils.prepare_data() if load_model: print("loading model...") encoder = torch.load(config.ENCODER_PATH) decoder = torch.load(config.DECODER_PATH) else: print("initalizing model...") encoder = model.EncoderRNN(source_vocab.n_words, config.HIDDEN_SIZE) decoder = model.AttnDecoderRNN(target_vocab.n_words, config.HIDDEN_SIZE) if training: train_loss_history,train_acc_history, eval_loss_history, eval_acc_history \ = train_eval.trainIters(encoder, decoder, source_vocab, target_vocab, train_pairs, eval_pairs) utils.show_and_save_plot(train_loss_history, train_acc_history, eval_loss_history, eval_acc_history, file_name) sentence = "BOOL && BOOL && BOOL" translation, attention = train_eval.evaluate(encoder, decoder, source_vocab, target_vocab, sentence, max_length=config.MAX_LENGTH) translation = utils.print_from_list(translation) print(translation)
from train_eval import train_loop, evaluate from utils import get_dataloaders if __name__ == "__main__": # Load the datasets and used vocabulary init(config) # Get dataloaders for train/validation/test sets train_loader, valid_loader, test_loader = get_dataloaders( config['train'], config['val'], config['test']) if net_config.mode == "train": # Create fresh new instance of the RNN net = SentimentAnalyzer(config['vocab'], net_config.hidden_dim, net_config.layers, net_config.dropout, net_config.bidirectional).to(device) # Train the network train_loop(net, train_loader, valid_loader, test_loader) else: # Create fresh new instance of the RNN which # holds loaded pretrained weights net = SentimentAnalyzer(config['vocab'], net_config.hidden_dim, net_config.layers, net_config.dropout, net_config.bidirectional).to(device) # Load pretrained model parameters net.load_state_dict(torch.load(net_config.pretrained_loc)) # Evaluate the network perfomance with torch.no_grad(): test_loss, test_acc = evaluate(net, test_loader, LOSS_FUNC) print(f'Test. Loss: {test_loss:.3f} | Test. Acc: {test_acc*100:.2f}%')
model = Seq2Seq.Seq2Seq(encoder, decoder, device).to(device) model_name = "S2S.pt" print("Initialize weights") model.apply(initialize_weights) optimizer = optim.Adam(model.parameters(), lr=lr) target_pad_idx = en_field.vocab.stoi[en_field.pad_token] criterion = nn.CrossEntropyLoss(ignore_index=target_pad_idx) best_val_loss = float('inf') writer = SummaryWriter(log_dir) for epoch in range(num_epochs): s = time.time() train_loss = train(model, train_loader, optimizer, criterion, clip=1) val_loss = evaluate(model, val_loader, criterion) t = time.time() epoch_min, epoch_sec = epoch_time(s, t) if val_loss < best_val_loss: best_val_loss = val_loss torch.save(model.state_dict(), os.path.join(ckpt_dir, model_name)) print("Epoch : %02d | Elapsed Time : %02d min %02d sec" % (epoch + 1, epoch_min, epoch_sec)) print("\t Train Loss : %.3f | Train PPL : %7.3f" % (train_loss, math.exp(train_loss))) print("\t Val Loss : %.3f | Val PPL : %7.3f" % (val_loss, math.exp(val_loss)))
model = Seq2Seq(enc, dec).to(device) optimizer = optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss(ignore_index=trg_pad_idx) count_parameters(model) N_EPOCHS = 90 CLIP = 0.1 best_metric = 0 for epoch in range(N_EPOCHS): start_time = time.time() model.train() train_loss = train(model, train_iterator, optimizer, criterion, CLIP) test_loss = evaluate(model, test_iterator, criterion) metrics_test = calculate_avg_rouge_f(test_data, SRC, TRG, model, device) print(f'\tMetrics_test: {metrics_test}') end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if metrics_test > best_metric: print('New best score!') best_metric = metrics_test torch.save(model.state_dict(), 'models/best-model.pt') print(f'Epoch: {epoch + 1:02} | Time: {epoch_mins}m {epoch_secs}s') print(f'\tTrain Loss: {train_loss:.3f} | Test Loss: {test_loss:.3f}')
def main(args): if (os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train): print("输出目录 ({}) 已经存在且不为空. ".format(args.output_dir)) print("你想覆盖掉该目录吗?type y or n") if input() == 'n': return if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) # gpu ready gpu_ids = [int(device_id) for device_id in args.gpu_ids.split()] args.device, args.n_gpu = get_device(gpu_ids[0]) # PTM ready config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] config = config_class.from_pretrained(args.config_file, num_labels=2, cache_dir=None) tokenizer = tokenizer_class.from_pretrained( args.vocab_file, do_lower_case=args.do_lower_case, cache_dir=None) # train and eval get the checkpoint if args.do_train: train_dataset = load_data(args, tokenizer, 'train') train_dataloader = random_dataloader(train_dataset, args.train_batch_size) dev_dataset = load_data(args, tokenizer, 'dev') dev_dataloader = sequential_dataloader(dev_dataset, args.dev_batch_size) # 模型准备 model = model_class.from_pretrained(args.model_file, from_tf=False, config=config, cache_dir=None) model.to(args.device) if args.n_gpu > 1: model = torch.nn.DataParallel(model, device_ids=gpu_ids) # optimizer ready no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], "weight_decay": args.weight_decay, }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], "weight_decay": 0.0 }, ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) t_total = len( train_dataloader ) // args.gradient_accumulation_steps * args.num_train_epochs scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total) train(args, train_dataloader, dev_dataloader, model, optimizer, scheduler, tokenizer) # Predict checkpoint result tokenizer = tokenizer_class.from_pretrained( args.output_dir, do_lower_case=args.do_lower_case) test_dataset = load_data(args, tokenizer, 'test') test_dataloader = sequential_dataloader(test_dataset, args.test_batch_size) model = model_class.from_pretrained(args.output_dir) model.to(args.device) eval_loss, eval_metric = evaluate(args, model, test_dataloader, do_predict=True) for key, val in eval_metric.items(): print('the test dataset {} is {}'.format(key, val))
model.to(device) optimizer = optim.Adam(model.parameters()) n_epochs = 5 clip = 1 best_test_loss = float('inf') for epoch in range(n_epochs): print("EPOCH ", epoch, " START #########################################") start_time = time.time() train_loss = train(model, train_dl, optimizer, clip) test_loss, f1 = evaluate(model, test_dl, vectorizer) end_time = time.time() epoch_time = end_time - start_time if test_loss < best_test_loss: best_test_loss = test_loss torch.save(model, f'./checkpoints/ner_lstm_epoch_{epoch}.pt') torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'train_loss': train_loss, 'test_loss': test_loss