parser = argparse.ArgumentParser(description='Chinese Ner Pytorch') parser.add_argument('--doing', type=str, required=True, help='choose a action: train,test,predict') parser.add_argument('--model', type=str, required=True, help='choose a model: Bert,Albert,Xlnet,Gpt-2') args = parser.parse_args() if __name__ == '__main__': model_name = args.model x = import_module('Models.' + model_name) config = x.Config() np.random.seed(1) torch.manual_seed(1) torch.cuda.manual_seed_all(1) torch.backends.cudnn.deterministic = True # 保证每次结果一样 start_time = time.time() print("Loading Datas...") train_dataset = built_train_dataset(config) dev_dataset = built_dev_dataset(config) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) if args.doing=='train': model = x.Model(config).to(config.device) train(config, model, train_dataset, dev_dataset) if args.doing=='predict': model = x.Model(config).to(config.device) predict(config,model,)
import torch.nn as nn import os os.environ["CUDA_VISIBLE_DEVICES"] = "1,2,3,5" parser = argparse.ArgumentParser(description='Chinese Text Classification') parser.add_argument('--model', type=str, required=True, help='choose a model: Bert, ERNIE') args = parser.parse_args() if __name__ == '__main__': dataset = 'THUCNews' # 数据集 model_name = args.model # bert x = import_module('models.' + model_name) config = x.Config(dataset) np.random.seed(1) torch.manual_seed(1) torch.cuda.manual_seed_all(1) torch.backends.cudnn.deterministic = True # 保证每次结果一样 model = x.Model(config) #init_network(model) model=nn.DataParallel(model,device_ids=[0,1,2,3]) model.to(config.device) model.load_state_dict(torch.load('THUCNews/saved_dict/ERNIE0.4gauss.ckpt')) train(config,model)
model_name = "%s%s.pt" % (CONFIG["SAVE_DIR"], CONFIG["model"]) # initialize the early_stopping object early_stopping = EarlyStopping(patience=20, verbose=True, path=model_name) lr = CONFIG["lr"] for epoch in range(1, CONFIG["epochs"] + 1): print(f'Epoch: {epoch:02}') epoch_start_time = time.time() print('\tTrain: ', end='') train_loss, train_acc = train( train_dl, model, criterion, list(range(CONFIG["gpus"])), DEVICE, optimizer, scheduler=scheduler, seq2seq=not classify, pad_idx=vocab[CONFIG["pad_token"]] if not classify else -1) history['train_loss'].append(train_loss) history['train_acc'].append(train_acc) print('\n\tValid: ', end='') with torch.no_grad(): valid_loss, valid_acc = valid( valid_dl, model, criterion, DEVICE, temperature=CONFIG["temp"],
train_data2, dev_data2 = BalanceFold(train_news, 20) train_data3, dev_data3 = BalanceFold(train_emotion, 10) print("dev size: ", len(dev_data1), len(dev_data2), len(dev_data3)) train_iter1 = build_iterator2(train_data1, config) dev_iter1 = build_iterator2(dev_data1, config) train_iter2 = build_iterator2(train_data2, config) dev_iter2 = build_iterator2(dev_data2, config) train_iter3 = build_iterator2(train_data3, config) dev_iter3 = build_iterator2(dev_data3, config) print(len(dev_iter1), len(dev_iter2), len(dev_iter3)) print("len(train_features_1)=", len(train_iter1)) print("len(train_features_2)=", len(train_iter2)) print("len(train_features_3)=", len(train_iter3)) a = [] for i in range(len(train_iter1)): a.append(1) for i in range(len(train_iter2)): a.append(2) for i in range(len(train_iter3)): a.append(3) random.seed(1234) #random.seed(1) random.shuffle(a) print("len(a)=", len(a)) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) config.save_path = '../user_data/Roberta-wwm-36/bert.ckpt' # model.load_state_dict(torch.load(config.save_path)) train(config, model, a, train_iter1, dev_iter1, train_iter2, dev_iter2, train_iter3, dev_iter3)
model = Seq2Seq(enc, dec).to(device) optimizer = optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss(ignore_index=trg_pad_idx) count_parameters(model) N_EPOCHS = 90 CLIP = 0.1 best_metric = 0 for epoch in range(N_EPOCHS): start_time = time.time() model.train() train_loss = train(model, train_iterator, optimizer, criterion, CLIP) test_loss = evaluate(model, test_iterator, criterion) metrics_test = calculate_avg_rouge_f(test_data, SRC, TRG, model, device) print(f'\tMetrics_test: {metrics_test}') end_time = time.time() epoch_mins, epoch_secs = epoch_time(start_time, end_time) if metrics_test > best_metric: print('New best score!') best_metric = metrics_test torch.save(model.state_dict(), 'models/best-model.pt') print(f'Epoch: {epoch + 1:02} | Time: {epoch_mins}m {epoch_secs}s')
device=device) model.to(device) optimizer = optim.Adam(model.parameters()) n_epochs = 5 clip = 1 best_test_loss = float('inf') for epoch in range(n_epochs): print("EPOCH ", epoch, " START #########################################") start_time = time.time() train_loss = train(model, train_dl, optimizer, clip) test_loss, f1 = evaluate(model, test_dl, vectorizer) end_time = time.time() epoch_time = end_time - start_time if test_loss < best_test_loss: best_test_loss = test_loss torch.save(model, f'./checkpoints/ner_lstm_epoch_{epoch}.pt') torch.save( { 'epoch': epoch, 'model_state_dict': model.state_dict(), 'optimizer_state_dict': optimizer.state_dict(), 'train_loss': train_loss,
val_dataset = dataset[num_train:num_train + num_val] test_dataset = dataset[num_train + num_val:] train_loader = DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True) val_loader = DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False) test_loader = DataLoader(test_dataset, batch_size=args.batch_size, shuffle=False) for num_layers, hidden in product(layers, hiddens): print(f'--\n{dataset_name} - {Net.__name__} - {num_layers} - {hidden}') model = Net(dataset, num_layers, hidden).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) stats_list = [] for epoch in range(1, args.epochs + 1): loss, stats = train(model, optimizer, train_loader) val_acc, val_time = eval_acc(model, val_loader) test_acc, test_time = eval_acc(model, test_loader) if epoch >= args.warmup_profile: stats_list.append(stats) stats_summary = get_stats_summary(stats_list) print(stats_summary)
def main(): dataset = args.dataset shadow_path, target_path, attack_path = "./models/shadow_" + str(dataset) + ".pth", \ "./models/target_" + str(dataset) + ".pth", \ "./models/attack_" + str(dataset) + ".pth" # Cifar has rgb images(3 channels) and mnist is grayscale(1 channel) if dataset == "cifar": input_size = 3 elif dataset == "mnist": input_size = 1 n_epochs = args.epoch attack_epochs = args.attack_epoch batch_size = args.batch_size # This is the main dataloader with the total dataset shadow_train_loader = dataloader(dataset=dataset, batch_size_train=batch_size, batch_size_test=1000, split_dataset="shadow_train") shadow_out_loader = dataloader(dataset=dataset, batch_size_train=batch_size, batch_size_test=1000, split_dataset="shadow_out") target_train_loader = dataloader(dataset=dataset, batch_size_train=batch_size, batch_size_test=1000, split_dataset="target_train") target_out_loader = dataloader(dataset=dataset, batch_size_train=batch_size, batch_size_test=1000, split_dataset="target_out") testloader = dataloader(dataset=dataset, batch_size_train=batch_size, batch_size_test=1000, split_dataset="test") # Based on attack I the adversary knows the structure of the target net, thus can train a shadow model to mimic the # behaviour of the target target_net = shadow_net = ConvNet(input_size=input_size) # Simple initialization of model weights target_loss = shadow_loss = nn.CrossEntropyLoss() target_optim = optim.Adam(target_net.parameters(), lr=0.001) shadow_optim = optim.Adam(shadow_net.parameters(), lr=0.001) # attack net is a binary classifier to determine membership attack_net = MlleaksMLP() # Binary cross entropy as loss attack_loss = nn.BCELoss() attack_optim = optim.Adam(attack_net.parameters(), lr=0.001) # Three training loops are following, first the shadow model, then the target model and last the attack model. if os.path.exists(shadow_path): print("Load shadow model") shadow_net.load_state_dict(torch.load(shadow_path)) # Training of shadow model on shadow training set if not args.only_eval: print("start training shadow model: ") for epoch in range(n_epochs): loss_train_shadow = train(shadow_net, shadow_train_loader, shadow_loss, shadow_optim, verbose=False) # Evaluate model after every five epochs if (epoch + 1) % 5 == 0: accuracy_train_shadow = eval_model(shadow_net, shadow_train_loader, report=False) accuracy_test_shadow = eval_model(shadow_net, testloader, report=True) print( "Shadow model: epoch[%d/%d] Train loss: %.5f training set accuracy: %.5f test set accuracy: %.5f" % (epoch + 1, n_epochs, loss_train_shadow, accuracy_train_shadow, accuracy_test_shadow)) if args.save_new_models: if not os.path.exists("./models"): os.mkdir("./models" ) # Create the folder models if it doesn't exist # Save model after each epoch if argument is true torch.save(shadow_net.state_dict(), "./models/shadow_" + str(dataset) + ".pth") if os.path.exists(target_path): print("Load target model") target_net.load_state_dict(torch.load(target_path)) # Train of target model on the target training set if not args.only_eval: print("start training target model: ") for epoch in range(n_epochs): loss_train_target = train(target_net, target_train_loader, target_loss, target_optim, verbose=False) # Evaluate model after every five epochs if (epoch + 1) % 5 == 0: accuracy_train_target = eval_model(target_net, target_train_loader, report=False) accuracy_test_target = eval_model(target_net, testloader, report=True) print( "Target model: epoch[%d/%d] Train loss: %.5f training set accuracy: %.5f test set accuracy: %.5f" % (epoch + 1, n_epochs, loss_train_target, accuracy_train_target, accuracy_test_target)) if args.save_new_models: # Save model after each epoch if not os.path.exists("./models"): os.mkdir("./models" ) # Create the folder models if it doesn't exist torch.save(target_net.state_dict(), target_path) if os.path.exists(attack_path): print("Load attack model") attack_net.load_state_dict(torch.load(attack_path)) # Training of attack model based on shadow net posteriors on shadow train and out datasets. if not args.only_eval: print("start training attacker model") for epoch in range(attack_epochs): loss_attack = train_attacker(attack_net, shadow_net, shadow_train_loader, shadow_out_loader, attack_optim, attack_loss, num_posterior=3, verbose=False) # The model is evaluated using the target net posteriors to classify membership of data on target train/out. # Evaluate model after every five epochs if (epoch + 1) % 1 == 0: max_accuracy = eval_attacker(attack_net, target_net, target_train_loader, target_out_loader, num_posterior=3) print( "Attack model: epoch[%d/%d] Train loss: %.5f Accuracy on target set: %.5f" % (epoch + 1, attack_epochs, loss_attack, max_accuracy)) if args.save_new_models: if not os.path.exists("./models"): os.mkdir( "./models" ) # Create the folder models if it doesn't exist # Save model after each epoch torch.save(attack_net.state_dict(), attack_path) # Only evaluated pretrained loaded models when only_eval argument is True if args.only_eval: print("Classification Report Shadow Net:") eval_model(shadow_net, testloader, report=True) print("Classification Report Target Net:") eval_model(target_net, testloader, report=True) print("Report of Attack Net") eval_attacker(attack_net, target_net, target_train_loader, target_out_loader, num_posterior=3)
x = import_module('models.' + model_name) # 配置参数 config = x.Config(dataset) # 固定以下参数是为了保证每次结果一样 np.random.seed(1) # 为CPU设置种子用于生成随机数 torch.manual_seed(1) # #为所有GPU设置随机种子 torch.cuda.manual_seed_all(1) # 这个参数为True, 每次返回的卷积算法将是确定的,即默认算法 torch.backends.cudnn.deterministic = True # 保证每次结果一样 start_time = time.time() print("Loading data...") OCNLI_train, OCNLI_dev, OCEMOTION_train, OCEMOTION_dev, TNEWS_train, TNEWS_dev = build_dataset( config, mode='train') OCNLI_train_iter = build_iterator(OCNLI_train, config) OCEMOTION_train_iter = build_iterator(OCEMOTION_train, config) TNEWS_train_iter = build_iterator(TNEWS_train, config) OCNLI_dev_iter = build_iterator(OCNLI_dev, config) OCEMOTION_dev_iter = build_iterator(OCEMOTION_dev, config) TNEWS_dev_iter = build_iterator(TNEWS_dev, config) time_dif = get_time_dif(start_time) # train model = x.Model(config).to(config.device) train(config, model, OCNLI_train_iter, OCNLI_dev_iter, OCEMOTION_train_iter, OCEMOTION_dev_iter, TNEWS_train_iter, TNEWS_dev_iter)
model = DSN(in_dim=64, hid_dim=args.hidden_dim, num_layers=args.num_layers, cell=args.rnn_cell) model.apply(weights_init) print("Model size: {:.5f}M".format( sum(p.numel() for p in model.parameters()) / 1000000.0)) print(" ========== \nArgs:{} \n========== ".format(args)) args.train_keys = split['train_keys'] args.test_keys = split['test_keys'] if args.train_model == 'sup': print("========Supervised Learning========") else: args.use_reward = True print("========Unsupervised Learning========") args.demo_h5 = osp.join( args.save_path, 'h5_res' + args.reward_type + str(args.lr) + args.train_model) model = train(args, model, dataset) # Testing Fscore, Precision, Recall = evaluate(args, model, dataset) # save model save_model_epoch(args, model, args.max_epoch)
batch_size = 256 hidden_size = 128 num_layers = 1 dropout = 0 testnum = 500 # interval is sample interval between last input and first output. interval = 0 epoch = 100 device = 'cuda' # Generate sin dataset for training and testing. dataset = np.sin([i / 50 * 2 * np.pi for i in range(2000)]) x_train, y_train, x_test, y_test, normalizer = generate_data( dataset, 'minmax', input_length, output_length, testnum, interval) # Build, train and predict. model = GRU(1, hidden_size, num_layers, 1, dropout) optimizer = opt.Adam(model.parameters()) loss = nn.MSELoss() batch_train_loss, batch_val_loss = train(model, x_train, y_train, epoch, batch_size, optimizer, loss, device) y_predict, y_real, _ = predict(model, x_test, y_test, loss, device, normalizer, batch_size) # Draw result plt.plot(y_predict, label='prediction') plt.plot(y_real, label='real') plt.legend() plt.show()
label_vocab_dim = len(w2i_fre_train) m = model.enc_dec_attn(args.enc_hid, args.dec_hid, args.emb_dim, args.drop_prob, device, inp_vocab_dim, label_vocab_dim) # print(m) # print(f'The model has {model.count_parameters(m):,} trainable parameters') data_loader_train = data_loader.get_data_loader(train, w2i_eng_train, w2i_fre_train, args.batch_size, eng_lm, fre_lm) data_loader_val = data_loader.get_data_loader(val, w2i_eng_val, w2i_fre_val, args.batch_size, eng_lm, fre_lm) data_loader_test = data_loader.get_data_loader(test, w2i_eng_test, w2i_fre_test, args.batch_size, eng_lm, fre_lm) loss = train_eval.train(m, args.epochs, args.batch_size, data_loader_train, data_loader_val, w2i_eng_train, i2w_eng_val, w2i_fre_train, i2w_fre_val, device) criterion = nn.CrossEntropyLoss().to(device) loss = train_eval.val_test(m, data_loader_test, criterion, w2i_eng_train, i2w_eng_test, w2i_fre_train, i2w_fre_test, device, 'test') print('Test Loss: {}'.format(loss))
from train_eval import train from utils import get_sent, get_input, get_data_loader, printm data_path = './data' # data_path = './data' bert_path = './bert' # bert_path = '/content/bert-utils/bert/multilingual_L-12_H-768_A-12' if __name__ == '__main__': config = Config(data_path, bert_path) if config.device == 'cuda': memoryUtil = printm(config) if memoryUtil > 0.2: try: sys.exit() except: print('MemoryUtil FULL') train_sent, dev_sent, test_sent = get_sent(data_path) train_dataloader = get_data_loader(config, train_sent) dev_dataloader = get_data_loader(config, dev_sent) test_dataloader = get_data_loader(config, test_sent) model = config.model train(config, model, train_dataloader, dev_dataloader, test_dataloader)
train_loader = DataLoader(dataset=train_datasets, batch_size=128, shuffle=True, num_workers=0) dev_loader = DataLoader(dataset=dev_datasets, batch_size=512, shuffle=False, num_workers=0) model = Model(config, embedding_pretraineds, device) # 定义模型 logger.info('{} Fold'.format(i + 1)) logger.info(('*' * 50)) logger.debug(model) train(config, model, train_loader, dev_loader) test_loader = DataLoader(dataset=test_datasets, batch_size=512, shuffle=False, num_workers=0) preds_age = test(config, model, test_loader) index_col = config.getint('model_parameters_settings', 'test_index') # 生成提交数据 preds_age = preds_age + 1 preds_gender = list([4 for _ in range(len(preds_age))])
os.environ["CUDA_VISIBLE_DEVICES"] = "0" args = parser.parse_args() if __name__ == '__main__': dataset = 'text_emotion' # 数据集 model_name = args.model # bert x = import_module('models.' + model_name) config = x.Config(dataset) np.random.seed(1) torch.manual_seed(1) torch.cuda.manual_seed_all(1) torch.backends.cudnn.deterministic = True # 保证每次结果一样 if torch.cuda.device_count() > 0: torch.cuda.manual_seed_all(1) start_time = time.time() print("Loading data...") train_iter, dev_iter = build_dataset(config, args) #train_iter = build_iterator(train_data, config) #dev_iter = build_iterator(dev_data, config) #test_iter = build_iterator(test_data, config) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) model = x.Model(config).to(config.device) train(config, model, train_iter, dev_iter, None)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model_type", default='bert', type=str, help="Model type selected in the list: " + ", ".join(MODEL_CLASSES.keys())) parser.add_argument( "--model_name_or_path", default='bert-base-uncased', type=str, help="Path to pre-trained model or shortcut name selected in the list: " + ", ".join(ALL_MODELS)) parser.add_argument( "--output_dir", default='../output_mc', type=str, help= "The output directory where the model checkpoints and predictions will be written." ) parser.add_argument("--raw_data_dir", default='../data_mc', type=str) parser.add_argument( "--config_name", default="", type=str, help="Pretrained config name or path if not the same as model_name") parser.add_argument( "--tokenizer_name", default="", type=str, help="Pretrained tokenizer name or path if not the same as model_name") parser.add_argument( "--max_seq_length", default=384, type=int, help= "The maximum total input sequence length after WordPiece tokenization. Sequences " "longer than this will be truncated, and sequences shorter than this will be padded." ) parser.add_argument("--task_name", default='DREAM') parser.add_argument("--do_train", action='store_true', help="Whether to run training.") parser.add_argument("--do_eval", action='store_true', help="Whether to run eval on the dev set.") parser.add_argument("--do_test", action='store_true', help='Whether to run test on the test set') parser.add_argument( "--evaluate_during_training", action='store_true', help="Rul evaluation during training at each logging step.") parser.add_argument( "--do_lower_case", action='store_true', help="Set this flag if you are using an uncased model.") parser.add_argument("--per_gpu_train_batch_size", default=8, type=int, help="Batch size per GPU/CPU for training.") parser.add_argument("--per_gpu_eval_batch_size", default=8, type=int, help="Batch size per GPU/CPU for evaluation.") parser.add_argument("--learning_rate", default=3e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument( '--gradient_accumulation_steps', type=int, default=1, help= "Number of updates steps to accumulate before performing a backward/update pass." ) parser.add_argument("--weight_decay", default=0.0, type=float, help="Weight deay if we apply some.") parser.add_argument("--adam_epsilon", default=1e-8, type=float, help="Epsilon for Adam optimizer.") parser.add_argument("--max_grad_norm", default=1.0, type=float, help="Max gradient norm.") parser.add_argument("--num_train_epochs", default=2.0, type=float, help="Total number of training epochs to perform.") parser.add_argument( "--max_steps", default=-1, type=int, help= "If > 0: set total number of training steps to perform. Override num_train_epochs." ) parser.add_argument("--warmup_proportion", default=0.1, type=float, help="Linear warmup over warmup_steps.") parser.add_argument("--time_stamp", default='', type=str) parser.add_argument( "--verbose_logging", action='store_true', help= "If true, all of the warnings related to data processing will be printed. " "A number of warnings are expected for a normal SQuAD evaluation.") parser.add_argument( "--eval_all_checkpoints", action='store_true', help= "Evaluate all checkpoints starting with the same prefix as model_name ending and ending with step number" ) parser.add_argument("--no_cuda", action='store_true', help="Whether not to use CUDA when available") parser.add_argument('--overwrite_output_dir', action='store_true', help="Overwrite the content of the output directory") parser.add_argument( '--overwrite_cache', action='store_true', help="Overwrite the cached training and evaluation sets") parser.add_argument('--seed', type=int, default=42, help="random seed for initialization") parser.add_argument("--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus") parser.add_argument( '--fp16', action='store_true', help= "Whether to use 16-bit (mixed) precision (through NVIDIA apex) instead of 32-bit" ) parser.add_argument( '--fp16_opt_level', type=str, default='O1', help= "For fp16: Apex AMP optimization level selected in ['O0', 'O1', 'O2', and 'O3']." "See details at https://nvidia.github.io/apex/amp.html") args = parser.parse_args() if os.path.exists(args.output_dir) and os.listdir( args.output_dir ) and args.do_train and not args.overwrite_output_dir: logger.info( "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome." .format(args.output_dir)) # Setup CUDA, GPU & distributed training if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") args.n_gpu = torch.cuda.device_count() else: # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) torch.distributed.init_process_group(backend='nccl') args.n_gpu = 1 args.device = device # Set seed set_seed(args) args.model_type = args.model_type.lower() config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] config = config_class.from_pretrained( args.config_name if args.config_name else args.model_name_or_path) config.output_hidden_states = True config.num_options = int( MULTIPLE_CHOICE_TASKS_NUM_LABELS[args.task_name.lower()]) tokenizer = tokenizer_class.from_pretrained( args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case) model = model_class.from_pretrained( args.model_name_or_path, from_tf=bool('.ckpt' in args.model_name_or_path), config=config) model.MLP.copy_from_bert(model.bert) model.to(args.device) logger.info("Training/evaluation parameters %s", args) if args.fp16: try: import apex apex.amp.register_half_function(torch, 'einsum') except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use fp16 training." ) if args.do_train: logging.getLogger("transformers.tokenization_utils").setLevel( logging.ERROR) # Reduce logging train_dataset = load_and_cache_examples(args, task=args.task_name, tokenizer=tokenizer, evaluate=False) global_step, tr_loss = train(args, train_dataset, model, tokenizer) logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) time_stamp = args.time_stamp # Evaluation # We do not use dev set if args.do_eval and args.local_rank in [-1, 0]: checkpoints = [args.output_dir] if args.eval_all_checkpoints: checkpoints = list( os.path.dirname(c) for c in sorted( glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) logging.getLogger("transformers.modeling_utils").setLevel( logging.WARN) # Reduce logging logging.getLogger("transformers.tokenization_utils").setLevel( logging.ERROR) # Reduce logging logging.getLogger("transformers.configuration_utils").setLevel( logging.WARN) # Reduce logging checkpoints = [ checkpoint for checkpoint in checkpoints if time_stamp in checkpoint ] logger.info("Evaluate the following checkpoints for validation: %s", checkpoints) best_ckpt = 0 best_acc = 0 for checkpoint in checkpoints: global_step = checkpoint.split( '-')[-1] if len(checkpoints) > 1 else "" prefix = checkpoint.split( '/')[-1] if checkpoint.find('checkpoint') != -1 else "" logger.info("Load the model: %s", checkpoint) model = model_class.from_pretrained(checkpoint) model.to(args.device) result = evaluate(args, args.task_name, model, tokenizer, prefix=prefix) if result[0]['eval_acc'] > best_acc: best_ckpt = checkpoint best_acc = result[0]['eval_acc'] if args.do_test and args.local_rank in [-1, 0]: try: checkpoints = [best_ckpt] except: checkpoints = list( os.path.dirname(c) for c in sorted( glob.glob(args.output_dir + '/**/' + WEIGHTS_NAME, recursive=True))) checkpoints = [ checkpoint for checkpoint in checkpoints if time_stamp in checkpoint ] logging.getLogger("transformers.modeling_utils").setLevel( logging.WARN) # Reduce logging logging.getLogger("transformers.configuration_utils").setLevel( logging.WARN) # Reduce logging logging.getLogger("transformers.tokenization_utils").setLevel( logging.ERROR) # Reduce logging logger.info("Evaluate the following checkpoints for final testing: %s", checkpoints) for checkpoint in checkpoints: global_step = checkpoint.split( '-')[-1] if len(checkpoints) > 1 else "" prefix = checkpoint.split( '/')[-1] if checkpoint.find('checkpoint') != -1 else "" logger.info("Load the model: %s", checkpoint) model = model_class.from_pretrained(checkpoint) model.to(args.device) task_string = [ '', '-Add1OtherTruth2Opt', '-Add2OtherTruth2Opt', '-Add1PasSent2Opt', '-Add1NER2Pass' ] task_string = [args.task_name + item for item in task_string] result = evaluate(args, task_string, model, tokenizer, prefix=prefix, test=True)
'class_label'].apply(lambda x: 0) # 合并数据集 data_merge = pd.merge(select_from_data_train, select_from_data_neg, how="outer") # 打乱数据 data_merge = data_merge.sample(frac=1).reset_index(drop=True) # 切分数据集 train_set = data_merge.loc[:int(data_merge.shape[0] * 0.8)] dev_set = data_merge.loc[int(data_merge.shape[0] * 0.8):] # 将数据保存到模型定义好的路径里面去 train_set.to_csv('THUCNews/data/train.csv', index=False, header=False) dev_set.to_csv('THUCNews/data/dev.csv', index=False, header=False) # train_data, dev_data, test_data = build_dataset(config) train_data, dev_data = build_dataset(config) train_iter = build_iterator(train_data, config) dev_iter = build_iterator(dev_data, config) # test_iter = build_iterator(test_data, config) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) # train model = x.Model(config).to(config.device) # train(config, model, train_iter, dev_iter, test_iter) train(config, model, train_iter, dev_iter, save_path="THUCNews/saved_dict/" + model_name + '.ckpt')
def main(config): if not os.path.exists(config.model_dir): os.makedirs(config.model_dir) if not os.path.exists(config.log_dir): os.makedirs(config.log_dir) print("\t \t \t the model name is {}".format(config.model_name)) device, n_gpu = get_device() torch.manual_seed(config.seed) np.random.seed(config.seed) torch.manual_seed(config.seed) if n_gpu > 0: torch.cuda.manual_seed_all(config.seed) torch.backends.cudnn.deterministic = True # cudnn 使用确定性算法,保证每次结果一样 """ sst2 数据准备 """ text_field = data.Field(tokenize='spacy', lower=True, include_lengths=True, fix_length=config.sequence_length) label_field = data.LabelField(dtype=torch.long) train_iterator, dev_iterator, test_iterator = load_sst2( config.data_path, text_field, label_field, config.batch_size, device, config.glove_word_file) """ 词向量准备 """ pretrained_embeddings = text_field.vocab.vectors model_file = config.model_dir + 'model1.pt' """ 模型准备 """ if config.model_name == "TextCNN": filter_sizes = [int(val) for val in config.filter_sizes.split()] model = TextCNN.TextCNN(config.glove_word_dim, config.filter_num, filter_sizes, config.output_dim, config.dropout, pretrained_embeddings) elif config.model_name == "TextRNN": model = TextRNN.TextRNN(config.glove_word_dim, config.output_dim, config.hidden_size, config.num_layers, config.bidirectional, config.dropout, pretrained_embeddings) elif config.model_name == "LSTMATT": model = LSTMATT.LSTMATT(config.glove_word_dim, config.output_dim, config.hidden_size, config.num_layers, config.bidirectional, config.dropout, pretrained_embeddings) elif config.model_name == 'TextRCNN': model = TextRCNN.TextRCNN(config.glove_word_dim, config.output_dim, config.hidden_size, config.num_layers, config.bidirectional, config.dropout, pretrained_embeddings) optimizer = optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss() model = model.to(device) criterion = criterion.to(device) if config.do_train: train(config.epoch_num, model, train_iterator, dev_iterator, optimizer, criterion, ['0', '1'], model_file, config.log_dir, config.print_step, 'word') model.load_state_dict(torch.load(model_file)) test_loss, test_acc, test_report = evaluate(model, test_iterator, criterion, ['0', '1'], 'word') print("-------------- Test -------------") print( "\t Loss: {} | Acc: {} | Micro avg F1: {} | Macro avg F1: {} | Weighted avg F1: {}" .format(test_loss, test_acc, test_report['micro avg']['f1-score'], test_report['macro avg']['f1-score'], test_report['weighted avg']['f1-score']))
embedding = 'random' if args.embedding == 'random': embedding = 'random' model_name = args.model # 'TextRCNN' # TextCNN, TextRNN, FastText, TextRCNN, TextRNN_Att, DPCNN, Transformer from utils import build_dataset, build_iterator, get_time_dif x = import_module('models.' + model_name) config = x.Config(dataset, embedding) np.random.seed(1) torch.manual_seed(1) torch.cuda.manual_seed_all(1) torch.backends.cudnn.deterministic = True # 保证每次结果一样 start_time = time.time() print("Loading data...") vocab, train_data, dev_data, test_data, id_to_word = build_dataset(config, args.word) train_iter = build_iterator(train_data, config) dev_iter = build_iterator(dev_data, config) test_iter = build_iterator(test_data, config) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) # train config.n_vocab = len(vocab) model = x.Model(config).to(config.device) if model_name != 'Transformer': init_network(model) print(model.parameters) train(config, model, train_iter, dev_iter, test_iter, id_to_word)
def main(args): if (os.path.exists(args.output_dir) and os.listdir(args.output_dir) and args.do_train): print("输出目录 ({}) 已经存在且不为空. ".format(args.output_dir)) print("你想覆盖掉该目录吗?type y or n") if input() == 'n': return if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) # gpu ready gpu_ids = [int(device_id) for device_id in args.gpu_ids.split()] args.device, args.n_gpu = get_device(gpu_ids[0]) # PTM ready config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] config = config_class.from_pretrained(args.config_file, num_labels=2, cache_dir=None) tokenizer = tokenizer_class.from_pretrained( args.vocab_file, do_lower_case=args.do_lower_case, cache_dir=None) # train and eval get the checkpoint if args.do_train: train_dataset = load_data(args, tokenizer, 'train') train_dataloader = random_dataloader(train_dataset, args.train_batch_size) dev_dataset = load_data(args, tokenizer, 'dev') dev_dataloader = sequential_dataloader(dev_dataset, args.dev_batch_size) # 模型准备 model = model_class.from_pretrained(args.model_file, from_tf=False, config=config, cache_dir=None) model.to(args.device) if args.n_gpu > 1: model = torch.nn.DataParallel(model, device_ids=gpu_ids) # optimizer ready no_decay = ["bias", "LayerNorm.weight"] optimizer_grouped_parameters = [ { "params": [ p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay) ], "weight_decay": args.weight_decay, }, { "params": [ p for n, p in model.named_parameters() if any(nd in n for nd in no_decay) ], "weight_decay": 0.0 }, ] optimizer = AdamW(optimizer_grouped_parameters, lr=args.learning_rate, eps=args.adam_epsilon) t_total = len( train_dataloader ) // args.gradient_accumulation_steps * args.num_train_epochs scheduler = get_linear_schedule_with_warmup( optimizer, num_warmup_steps=args.warmup_steps, num_training_steps=t_total) train(args, train_dataloader, dev_dataloader, model, optimizer, scheduler, tokenizer) # Predict checkpoint result tokenizer = tokenizer_class.from_pretrained( args.output_dir, do_lower_case=args.do_lower_case) test_dataset = load_data(args, tokenizer, 'test') test_dataloader = sequential_dataloader(test_dataset, args.test_batch_size) model = model_class.from_pretrained(args.output_dir) model.to(args.device) eval_loss, eval_metric = evaluate(args, model, test_dataloader, do_predict=True) for key, val in eval_metric.items(): print('the test dataset {} is {}'.format(key, val))
dropout=dropout) model = Seq2Seq.Seq2Seq(encoder, decoder, device).to(device) model_name = "S2S.pt" print("Initialize weights") model.apply(initialize_weights) optimizer = optim.Adam(model.parameters(), lr=lr) target_pad_idx = en_field.vocab.stoi[en_field.pad_token] criterion = nn.CrossEntropyLoss(ignore_index=target_pad_idx) best_val_loss = float('inf') writer = SummaryWriter(log_dir) for epoch in range(num_epochs): s = time.time() train_loss = train(model, train_loader, optimizer, criterion, clip=1) val_loss = evaluate(model, val_loader, criterion) t = time.time() epoch_min, epoch_sec = epoch_time(s, t) if val_loss < best_val_loss: best_val_loss = val_loss torch.save(model.state_dict(), os.path.join(ckpt_dir, model_name)) print("Epoch : %02d | Elapsed Time : %02d min %02d sec" % (epoch + 1, epoch_min, epoch_sec)) print("\t Train Loss : %.3f | Train PPL : %7.3f" % (train_loss, math.exp(train_loss))) print("\t Val Loss : %.3f | Val PPL : %7.3f" %
from utils_new import build_dataset, build_iterator, get_time_dif parser = argparse.ArgumentParser(description='Chinese Text Classification') parser.add_argument('--model', type=str, required=True, help='choose a model: Bert, ERNIE') args = parser.parse_args() if __name__ == '__main__': dataset = 'HITSZQA' # 数据集 model_name = args.model # bert x = import_module('models.' + model_name) config = x.Config(dataset) np.random.seed(1) torch.manual_seed(1) torch.cuda.manual_seed_all(1) torch.backends.cudnn.deterministic = True # 保证每次结果一样 start_time = time.time() print("Loading data...") train_data, dev_data, test_data = build_dataset(config) train_iter = build_iterator(train_data, config) dev_iter = build_iterator(dev_data, config) test_iter = build_iterator(test_data, config) time_dif = get_time_dif(start_time) print("Time usage:", time_dif) # train model = x.Model(config).to(config.device) train(config, model, train_iter, dev_iter, test_iter)
self.train_len=0 self.test_len = 0 self.valid_len = 0 self.mode="train" ## transformer的参数 self.dropout=0.5 self.max_len=5000 self.nhead=2 #data_path="E:/study_series/2020_3/re_write_classify/data/" #data_path="/mnt/data3/wuchunsheng/code/nlper/NLP_task/text_classification/my_classification_cnews/2020_3_30/text_classify/data/" config=Config() train_iter, valid_iter, test_iter, TEXT=generate_data(config) #check_data(train_iter,TEXT) device=torch.device("cuda:0" if torch.cuda.is_available() else "cpu") #model = RNNModel(config, TEXT).to(device) model=TransformerModel(config, TEXT).to(device) train(config,model,train_iter, valid_iter, test_iter) #res=test(config,model,TEXT, test_iter)## 测试的是一个正批量的 #print(res) res=test_one_sentence(config, model, TEXT, test_iter) print(res)