def main(): parser = argparse.ArgumentParser(description="-----[RNN-Attention-classifier]-----") parser.add_argument("--mode", default="train", help="train: train (with test) a model / test: test saved models") parser.add_argument("--model", default="non-static", help="available models: rand, static, non-static, multichannel") parser.add_argument("--dataset", default="data", help="available datasets: MR, TREC") parser.add_argument("--save_model", default=True, action='store_true', help="whether saving model or not") parser.add_argument("--early_stopping", default=False, action='store_true', help="whether to apply early stopping") parser.add_argument("--epoch", default=20, type=int, help="number of max epoch") parser.add_argument("--learning_rate", default=0.001, type=float, help="learning rate") parser.add_argument("--gpu", default=0, type=int, help="the index of gpu to be used") options = parser.parse_args() if options.mode == "train": data, label_cnt = read_data(dir="raw_data/{}".format(task_name), train="train", dev="dev", test="test") else: data, label_cnt = read_data(dir="raw_data/{}".format(task_name), test="test") print(label_cnt) data["classes"] = sorted(list(set(data["train_y"]))) params = { "MODEL": options.model, "DATASET": options.dataset, "SAVE_MODEL": options.save_model, "EARLY_STOPPING": options.early_stopping, "EPOCH": options.epoch, "LEARNING_RATE": options.learning_rate, # "MAX_SENT_LEN": max([len(sent) for sent in data["train_x"] + data["dev_x"] + data["test_x"]]), "MAX_SENT_LEN": 32, "BATCH_SIZE": 50, "CLASS_SIZE": len(data["classes"]), "DROPOUT_PROB": 0.5, "NORM_LIMIT": 3, "GPU": options.gpu, "H_DIM":32 } print("=" * 20 + "INFORMATION" + "=" * 20) print("MODEL:", params["MODEL"]) print("DATASET:", params["DATASET"]) print("EPOCH:", params["EPOCH"]) print("LEARNING_RATE:", params["LEARNING_RATE"]) print("EARLY_STOPPING:", params["EARLY_STOPPING"]) print("SAVE_MODEL:", params["SAVE_MODEL"]) print("=" * 20 + "INFORMATION" + "=" * 20) if options.mode == "train": print("=" * 20 + "TRAINING STARTED" + "=" * 20) model = train(data, params) if params["SAVE_MODEL"]: save_cls(model, task_name, "attn.{}".format(model_name)) # save_vocab(data["vocab"], task_name, model_name) print("=" * 20 + "TRAINING FINISHED" + "=" * 20) else: model = load_cls(task_name, model_name).cuda(params["GPU"]) test_acc = test(data, model, params) print("test acc:", test_acc)
idx = mask.cpu().numpy() idx = [int(ix) for ix in idx] contents = [] for i in range(0, len(x)): if i not in idx: contents.append(x[i]) wl = { "content": ' '.join(contents), "line": line.strip(), "masks": list(idx), "label": str(label) } #print(wl) wl_str = json.dumps(wl) fw.write(wl_str) fw.write("\n") fw.close() print("processed over!") if __name__ == "__main__": cls = load_cls("{}".format(task_name), "attn.{}".format(model_name)).cuda() for i in cls.parameters(): i.requires_grad = False cls.eval() cls_tf_idf(cls, 1, "train") cls_tf_idf(cls, 0, "train") cls_tf_idf(cls, 1, "dev") cls_tf_idf(cls, 0, "dev") #cls_tf_idf(cls, 1, "test") #cls_tf_idf(cls, 0, "test")
lines = line.strip().decode('utf-8').encode('gb18030').split('\t') except: continue if (len(lines) != 2): continue if (string.atof(lines[1]) > dict_thre and num < dict_num): word_dict[lines[0]] = 1 num += 1 f.close() frname = os.path.join(save_path, sys.argv[1]) f = open(sys.argv[1], 'r') fwname = os.path.join(save_path, sys.argv[6] + '.data.' + operation) fw = open(fwname, 'w') cls = load_cls("{}".format(sys.argv[7]), "attn.cbert").cuda() for i in cls.parameters(): i.requires_grad = False def cmp(a, b): return (a > b) - (a < b) def cls_tf_idf(batch_lines): batch_x = [clean_str(sent) for sent in batch_lines] pred, attn = cls(batch_x) pred = np.argmax(pred.cpu().data.numpy(), axis=1) ret = [] for line, x, pre, att in zip(batch_lines, batch_x, pred, attn): if len(x) > 0:
def run_aug(args, save_every_epoch=False): processors = { "yelp": biLabelProcessor, "amazon": biLabelProcessor, "imagecaption": biLabelProcessor, } if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") n_gpu = torch.cuda.device_count() else: device = torch.device("cuda", args.local_rank) n_gpu = 1 # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.distributed.init_process_group(backend='nccl') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if n_gpu > 0: torch.cuda.manual_seed_all(args.seed) os.makedirs(args.output_dir, exist_ok=True) task_name = args.task_name.lower() if task_name not in processors: raise ValueError("Task not found: %s" % (task_name)) processor = processors[task_name]() label_list = processor.get_labels(task_name) def load_model(model_name): weights_path = os.path.join(PYTORCH_PRETRAINED_BERT_CACHE, model_name) model = torch.load(weights_path) return model cbert_name = "{}/CBertForMaskedLM_{}_epoch_{}{}".format( task_name.lower(), task_name.lower(), args.test_epoch, modified) model = load_model(cbert_name) model.to(device) cls_model = load_cls(task_name, model_name).cuda() for i in cls_model.parameters(): i.requires_grad = False cls_model.eval() if args.local_rank != -1: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank) elif n_gpu > 1: model = torch.nn.DataParallel(model) # Prepare optimizer param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'gamma', 'beta'] if args.do_eval: # eval_bleu参数 generate_file_0 = "evaluation/outputs/{}/sentiment.test.0.{}".format( task_name, model_name) dev_file_0 = "evaluation/outputs/{}/sentiment.dev.0.{}".format( task_name, model_name) orgin_file_0 = "evaluation/outputs/{}/sentiment.test.0.human".format( task_name) generate_file_1 = "evaluation/outputs/{}/sentiment.test.1.{}".format( task_name, model_name) dev_file_1 = "evaluation/outputs/{}/sentiment.dev.1.{}".format( task_name, model_name) orgin_file_1 = "evaluation/outputs/{}/sentiment.test.1.human".format( task_name) save_file_path = "evaluation/outputs/{}/{}_ft_wc{}".format( task_name, model_name, modified) if not os.path.exists(save_file_path): os.mkdir(save_file_path) # eval_acc参数 dict_file = 'test_tools/li_test_tool/classify_Bilstm/data/style_transfer/zhi.dict.{}'.format( task_name) if task_name == 'yelp': train_rate = 0.9984 valid_rate = 0.0008 test_rate = 0.0008 elif task_name == 'amazon': train_rate = 0.9989 valid_rate = 0.00055 test_rate = 0.00055 run_transfer(model, tokenizer, task_name, model_name=model_name, modified=modified, set="dev") dev_acc_0 = 1 - eval_acc(dict_file=dict_file, train_rate=train_rate, valid_rate=valid_rate, test_rate=test_rate, input_file=dev_file_0) dev_acc_1 = 1 - eval_acc(dict_file=dict_file, train_rate=train_rate, valid_rate=valid_rate, test_rate=test_rate, input_file=dev_file_1) dev_acc_avg = (dev_acc_0 + dev_acc_1) / 2 dev_acc_avg = round(dev_acc_avg * 1000) / 10.0 print('{{"dev acc":{}}}'.format(dev_acc_avg)) avg_loss = 0 run_transfer(model, tokenizer, task_name, model_name=model_name, modified=modified) bleu_0 = eval_bleu(generate_file=generate_file_0, orgin_file=orgin_file_0) * 100 bleu_1 = eval_bleu(generate_file=generate_file_1, orgin_file=orgin_file_1) * 100 bleu_avg = (bleu_0 + bleu_1) / 2 print('{{"bleu_0": {}, "bleu_1": {}, "bleu_avg": {}}}'.format( bleu_0, bleu_1, round(bleu_avg * 10) / 10.0)) acc_0 = (1 - eval_acc(dict_file=dict_file, train_rate=train_rate, valid_rate=valid_rate, test_rate=test_rate, input_file=generate_file_0)) * 100 acc_1 = (1 - eval_acc(dict_file=dict_file, train_rate=train_rate, valid_rate=valid_rate, test_rate=test_rate, input_file=generate_file_1)) * 100 acc_avg = (acc_0 + acc_1) / 2 print('{{"acc_0": {}, "acc_1": {}, "acc_avg": {}}}'.format( acc_0, acc_1, round(acc_avg * 10) / 10.0)) _acc = cls_test(cls_model, task_name) * 100 run_split(generate_file_0) run_split(generate_file_1) _bleu = eval_multi_bleu(model_name, task_name) print('{{"_ACCU": {}, "_BLEU": {}}}'.format( round(_acc * 10) / 10.0, round(_bleu * 10) / 10.0))
def main(): save_every_epoch = False args, train_dataloader, t_total, device, n_gpu = load_data() print("**********************************************************") print(args) def load_model(model_name): weights_path = os.path.join(PYTORCH_PRETRAINED_BERT_CACHE, model_name) model = torch.load(weights_path) return model cbert_name = "{}/CBertForMaskedLM_{}_epoch_10{}".format( task_name.lower(), task_name.lower(), modified) model = load_model(cbert_name) model.to(device) cls_model = load_cls(task_name, model_name).cuda() for i in cls_model.parameters(): i.requires_grad = False cls_model.eval() if args.local_rank != -1: model = torch.nn.parallel.DistributedDataParallel( model, device_ids=[args.local_rank], output_device=args.local_rank) elif n_gpu > 1: model = torch.nn.DataParallel(model) # Prepare optimizer param_optimizer = list(model.named_parameters()) no_decay = ['bias', 'gamma', 'beta'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay_rate': 0.0 }] optimizer = BertAdam(optimizer_grouped_parameters, lr=args.learning_rate, warmup=args.warmup_proportion, t_total=t_total) global_step = 0 model.train() save_model_dir = os.path.join(PYTORCH_PRETRAINED_BERT_CACHE, task_name) if not os.path.exists(save_model_dir): os.mkdir(save_model_dir) cls_criterion = nn.CrossEntropyLoss() # eval_bleu参数 generate_file_0 = "evaluation/outputs/{}/sentiment.test.0.{}".format( task_name, model_name) dev_file_0 = "evaluation/outputs/{}/sentiment.dev.0.{}".format( task_name, model_name) orgin_file_0 = "evaluation/outputs/{}/sentiment.test.0.human".format( task_name) generate_file_1 = "evaluation/outputs/{}/sentiment.test.1.{}".format( task_name, model_name) dev_file_1 = "evaluation/outputs/{}/sentiment.dev.1.{}".format( task_name, model_name) orgin_file_1 = "evaluation/outputs/{}/sentiment.test.1.human".format( task_name) save_file_path = "evaluation/outputs/{}/{}_ft_wc{}".format( task_name, model_name, modified) if not os.path.exists(save_file_path): os.mkdir(save_file_path) # eval_acc parameters dict_file = 'test_tools/li_test_tool/classify_Bilstm/data/style_transfer/zhi.dict.{}'.format( task_name) if task_name == 'yelp': train_rate = 0.9984 valid_rate = 0.0008 test_rate = 0.0008 elif task_name == 'amazon': train_rate = 0.9989 valid_rate = 0.00055 test_rate = 0.00055 else: train_rate = 0.9984 valid_rate = 0.0008 test_rate = 0.0008 acc_save_dict = {} bleu_save_dict = {} _acc_save_dict = {} _bleu_save_dict = {} count_dict = {} dev_acc_best = 0 for e in trange(int(args.num_train_epochs), desc="Epoch"): tr_loss, avg_loss, avg_acc = 0, 0, 0. nb_tr_examples, nb_tr_steps = 0, 0 for step, batch in enumerate(tqdm(train_dataloader)): batch = tuple(t.to(device) for t in batch) input_ids, input_mask, segment_ids, label_ids = batch lm_loss, _ = model(input_ids, segment_ids, input_mask, label_ids) segment_ids = 1 - segment_ids prediction_scores = model(input_ids, segment_ids, input_mask) prediction_scores = F.softmax(prediction_scores, dim=2) predicted_ids = prediction_scores @ bert_embeddings.weight batch_y = torch.stack([1 - b[0] for b in batch[2]]) pred, _ = cls(cls_model, predicted_ids, batch_y) # pred = F.softmax(pred, dim=1) cls_loss = cls_criterion(pred, batch_y) if lm_loss.item() > 1.5: loss = lm_loss / 100000 + cls_loss else: loss = cls_loss # + lm_loss loss.backward() #tr_loss += cls_loss.item() avg_loss += cls_loss.item() #avg_acc += cls_acc nb_tr_examples += input_ids.size(0) nb_tr_steps += 1 if (step + 1) % args.gradient_accumulation_steps == 0: optimizer.step() model.zero_grad() global_step += 1 if (step + 1) % 250 == 0: print("-------avg_loss: {}, lm_loss: {}--------".format( avg_loss / 250, lm_loss)) if (step + 1) % 250 == 0 and False: run_transfer(model, tokenizer, task_name, model_name=model_name, modified=modified, set="dev") dev_acc_0 = 1 - eval_acc(dict_file=dict_file, train_rate=train_rate, valid_rate=valid_rate, test_rate=test_rate, input_file=dev_file_0) dev_acc_1 = 1 - eval_acc(dict_file=dict_file, train_rate=train_rate, valid_rate=valid_rate, test_rate=test_rate, input_file=dev_file_1) dev_acc_avg = (dev_acc_0 + dev_acc_1) / 2 dev_acc_avg = round(dev_acc_avg * 1000) / 10.0 print('{{"dev acc":{}}}'.format(dev_acc_avg)) avg_loss = 0 run_transfer(model, tokenizer, task_name, model_name=model_name, modified=modified) bleu_0 = eval_bleu(generate_file=generate_file_0, orgin_file=orgin_file_0) * 100 bleu_1 = eval_bleu(generate_file=generate_file_1, orgin_file=orgin_file_1) * 100 bleu_avg = (bleu_0 + bleu_1) / 2 print('{{"bleu_0": {}, "bleu_1": {}, "bleu_avg": {}}}'.format( bleu_0, bleu_1, round(bleu_avg * 10) / 10.0)) acc_0 = (1 - eval_acc(dict_file=dict_file, train_rate=train_rate, valid_rate=valid_rate, test_rate=test_rate, input_file=generate_file_0)) * 100 acc_1 = (1 - eval_acc(dict_file=dict_file, train_rate=train_rate, valid_rate=valid_rate, test_rate=test_rate, input_file=generate_file_1)) * 100 acc_avg = (acc_0 + acc_1) / 2 print('{{"acc_0": {}, "acc_1": {}, "acc_avg": {}}}'.format( acc_0, acc_1, round(acc_avg * 10) / 10.0)) _acc = cls_test(cls_model, task_name) * 100 run_split(generate_file_0) run_split(generate_file_1) _bleu = eval_multi_bleu(model_name, task_name) print('{{"_ACCU": {}, "_BLEU": {}}}'.format( round(_acc * 10) / 10.0, round(_bleu * 10) / 10.0)) if acc_avg > acc_threshold and dev_acc_avg > acc_threshold: if not (dev_acc_avg < dev_acc_best): # save_model_name = "BertForMaskedLM_" + task_name + "_acc_" + str(acc) + "w_cls" # save_model_path = os.path.join(save_model_dir, save_model_name) # torch.save(model, save_model_path) save_file_name_0 = os.path.join( save_file_path, "sentiment.test.0.{}.{}.{}".format( model_name, round(acc_avg * 10) / 10.0, round(bleu_avg * 10) / 10.0)) shutil.copy(generate_file_0, save_file_name_0) save_file_name_1 = os.path.join( save_file_path, "sentiment.test.1.{}.{}.{}".format( model_name, round(acc_avg * 10) / 10.0, round(bleu_avg * 10) / 10.0)) shutil.copy(generate_file_1, save_file_name_1) if dev_acc_avg > dev_acc_best: dev_acc_best = dev_acc_avg acc_save_dict[dev_acc_avg] = acc_avg bleu_save_dict[dev_acc_avg] = bleu_avg _acc_save_dict[dev_acc_avg] = _acc _bleu_save_dict[dev_acc_avg] = _bleu count_dict[dev_acc_avg] = 1 elif dev_acc_avg == dev_acc_best: acc_save_dict[dev_acc_avg] += acc_avg bleu_save_dict[dev_acc_avg] += bleu_avg _acc_save_dict[dev_acc_avg] += _acc _bleu_save_dict[dev_acc_avg] += _bleu count_dict[dev_acc_avg] += 1 if save_every_epoch: save_model_name = "CBertForMaskedLM_" + task_name + "_w_cls_epoch" + str( e + 1) + modified save_model_path = os.path.join(save_model_dir, save_model_name) torch.save(model, save_model_path) else: if (e + 1) % 10 == 0: save_model_name = "CBertForMaskedLM_" + task_name + "_w_cls_epoch" + str( e + 1) + modified save_model_path = os.path.join(save_model_dir, save_model_name) torch.save(model, save_model_path) if False: cnt_best = count_dict[dev_acc_best] acc_best = round( acc_save_dict[dev_acc_best] * 10.0 / cnt_best) / 10.0 bleu_best = round( bleu_save_dict[dev_acc_best] * 10.0 / cnt_best) / 10.0 _acc_best = round( _acc_save_dict[dev_acc_best] * 10.0 / cnt_best) / 10.0 _bleu_best = round( _bleu_save_dict[dev_acc_best] * 10.0 / cnt_best) / 10.0 print("Best result: dev_acc {} acc {} bleu {} _acc {} _bleu {}". format(dev_acc_best, acc_best, bleu_best, _acc_best, _bleu_best))
from torch.autograd import Variable import torch import numpy as np from utils import read_data, read_test_data, load_cls, load_vocab def test_acc(model): data = read_test_data(dir="evaluation/outputs/yelp") x = data["test_x"] y = data["test_y"] model.eval() x = [sent for sent in x] pred = np.argmax(model(x).cpu().data.numpy(), axis=1) acc = sum([1 if p == y else 0 for p, y in zip(pred, y)]) / len(pred) return acc if __name__ == "__main__": cls = load_cls("yelp").cuda() print(test_acc(cls))