def train(x, y): model = TextCNN() model = model.cuda() parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.SGD(model.parameters(), lr=1e-3) criterion = nn.CrossEntropyLoss(size_average=False) for epoch in range(100): total = 0 for i in range(0, len(x) / 64): batch_x = x[i * 64:(i + 1) * 64] batch_y = y[i * 64:(i + 1) * 64] batch_x = Variable(torch.FloatTensor(batch_x)).cuda() batch_y = Variable(torch.LongTensor(batch_y)).cuda() optimizer.zero_grad() model.train() pred = model(batch_x, 64) loss = criterion(pred, batch_y) #print(loss) loss.backward() nn.utils.clip_grad_norm(parameters, max_norm=3) total += np.sum( pred.data.max(1)[1].cpu().numpy() == batch_y.data.cpu().numpy()) optimizer.step() print("epoch ", epoch + 1, " acc: ", float(total) / len(x)) return model
def train_text_cnn(argv=None): # Load dataset train_dl, valid_dl, test_dl, TEXT, _ = get_dataloaders(SEED, args) # Create net filter_sizes = [int(i) for i in args.filter_sizes.split(',')] num_vocab = len(TEXT.vocab) EMB_DIM = 100 pad_idx = TEXT.vocab.stoi[TEXT.pad_token] output_dim = 2 print('Dictionary size: {}'.format(num_vocab)) text_cnn = TextCNN(num_vocab, EMB_DIM, args.num_filters, filter_sizes, output_dim, args.dropout_r, pad_idx).to(args.device) # Load the pretrained_embedding pretrained_embeddings = TEXT.vocab.vectors text_cnn.embedding.weight.data.copy_(pretrained_embeddings) # Init unknown words and pad words embedding unk_idx = TEXT.vocab.stoi[TEXT.unk_token] text_cnn.embedding.weight.data[unk_idx] = torch.zeros(EMB_DIM) text_cnn.embedding.weight.data[pad_idx] = torch.zeros(EMB_DIM) text_cnn.embedding.requires_grad = False # setup loss and optimizer loss_func = torch.nn.CrossEntropyLoss() acc_func = categorical_accuracy opt = torch.optim.Adam(text_cnn.parameters(), lr=args.lr) # Start train for epoch in range(args.epoch): train_single_epoch(text_cnn, loss_func, acc_func, train_dl, opt, epoch) evaluate(text_cnn, loss_func, acc_func, test_dl, epoch)
def train(**kwargs): for k_, v_ in kwargs.items(): setattr(options, k_, v_) training_set = TextDataset(path='data/train/train.csv', model='wordvec/skipgram.bin', max_length=options.max_length, word_dim=options.word_dim) training_loader = Data.DataLoader(dataset=training_set, batch_size=options.batch_size, shuffle=True, drop_last=True) model = TextCNN(options.word_dim, options.max_length, training_set.encoder.classes_.shape[0]) if torch.cuda.is_available(): model.cuda() optimizer = optim.Adam(model.parameters(), lr=options.learning_rate) for epoch in tqdm(range(options.epochs)): loss_sum = 0 for data, label in tqdm(training_loader): if torch.cuda.is_available(): data = data.cuda() label = label.cuda() out = model(data) loss = criteration(out, autograd.Variable(label.squeeze().long())) loss_sum += loss.item() / options.batch_size optimizer.zero_grad() loss.backward() optimizer.step() tqdm.write(f'epoch {epoch + 1}: loss = {loss_sum/len(training_set.data)}') model.save(f'checkpoints/loss-{loss_sum/len(training_set.data)}.pt')
def train(**kwargs): opt.parse(kwargs) device = torch.device( "cuda:{}".format(opt.gpu_id) if torch.cuda.is_available() else "cpu") opt.device = device x_text, y = load_data_and_labels("./data/rt-polarity.pos", "./data/rt-polarity.neg") x_train, x_test, y_train, y_test = train_test_split( x_text, y, test_size=opt.test_size) train_data = Data(x_train, y_train) test_data = Data(x_test, y_test) train_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, collate_fn=collate_fn) test_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, collate_fn=collate_fn) print("{} train data: {}, test data: {}".format(now(), len(train_data), len(test_data))) model = TextCNN(opt) print("{} init model finished".format(now())) if opt.use_gpu: model.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) for epoch in range(opt.epochs): total_loss = 0.0 model.train() for step, batch_data in enumerate(train_loader): x, labels = batch_data labels = torch.LongTensor(labels) if opt.use_gpu: labels = labels.to(device) optimizer.zero_grad() output = model(x) loss = criterion(output, labels) loss.backward() optimizer.step() total_loss += loss.item() acc = test(model, test_loader) print("{} {} epoch: loss: {}, acc: {}".format(now(), epoch, total_loss, acc))
def train(epochs): vocab_size = loader.vocab_size num_classes = loader.num_classes model = TextCNN(vocab_size, num_classes) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) for epoch in range(epochs): print('-' * 40 + ' epoch {} '.format(epoch) + '-' * 40) train_iter(model, loader, criterion, optimizer) print() torch.save(model.state_dict(), 'cnn.state_dict.pth') return
def train(args): train_iter, dev_iter = data_processor.load_data(args) # 将数据分为训练集和验证集 print('加载数据完成') model = TextCNN(args) if args.cuda: model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) steps = 0 best_acc = 0 last_step = 0 model.train() for epoch in range(1, args.epoch + 1): for batch in train_iter: feature, target = batch.text, batch.label # t_()函数表示将(max_len, batch_size)转置为(batch_size, max_len) # feature.data.t_(), target.data.sub_(1) # target减去1 feature = feature.data.t() # x.t() x是不变的,所以重新赋值 # target.data.sub_(1) if args.cuda: feature, target = feature.cuda(), target.cuda() optimizer.zero_grad() logits = model(feature) loss = F.cross_entropy(logits, target) loss.backward() optimizer.step() steps += 1 if steps % args.log_interval == 0: # torch.max(logits, 1)函数:返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引) corrects = (torch.max(logits, 1)[1] == target).sum() train_acc = 100.0 * corrects / batch.batch_size sys.stdout.write( '\rBatch[{}] - loss: {:.6f} acc: {:.4f}%({}/{})'.format( steps, loss.item(), train_acc, corrects, batch.batch_size)) if steps % args.test_interval == 0: dev_acc = eval(dev_iter, model, args) if dev_acc > best_acc: best_acc = dev_acc last_step = steps if args.save_best: print('Saving best model, acc: {:.4f}%\n'.format( best_acc)) save(model, args.save_dir, 'best', steps) else: if steps - last_step >= args.early_stopping: print('\nearly stop by {} steps, acc: {:.4f}%'.format( args.early_stopping, best_acc)) raise KeyboardInterrupt
def objective(trial): model = TextCNN(trial, len(id2vocab), CLS) model.to(device) optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"]) lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True) optimizer = getattr(optim, optimizer_name)(model.parameters(), lr=lr) criterion = nn.NLLLoss() for epoch in range(EPOCHS): model.train() epoch_loss= [] for batch in train_iter: text_idx_batch, label_idx_batch = batch.text.t_().to(device), batch.label.to(device) model.zero_grad() out = model(text_idx_batch) loss = criterion(out, label_idx_batch) loss.backward() epoch_loss.append(loss.item()) optimizer.step() #print(f'Epoch[{epoch}] - Loss:{sum(epoch_loss)/len(epoch_loss)}') model.eval() predict_all = np.array([], dtype=int) labels_all = np.array([], dtype=int) with torch.no_grad(): for batch in val_iter: text_idx_batch, label_idx_batch = batch.text.t_().to(device), batch.label pred = model(text_idx_batch) pred = torch.max(pred.data, 1)[1].cpu().numpy() predict_all = np.append(predict_all, pred) truth = label_idx_batch.cpu().numpy() labels_all = np.append(labels_all, truth) acc = metrics.accuracy_score(labels_all, predict_all) trial.report(acc, epoch) if trial.should_prune(): raise optuna.exceptions.TrialPruned() return acc
def train(): train_contents, train_labels = load_corpus('./dataset/train.txt', word2id, max_sen_len=50) val_contents, val_labels = load_corpus('./dataset/validation.txt', word2id, max_sen_len=50) # 混合训练集和验证集 contents = np.vstack([train_contents, val_contents]) labels = np.concatenate([train_labels, val_labels]) # 加载训练用的数据 train_dataset = TensorDataset( torch.from_numpy(contents).type(torch.float), torch.from_numpy(labels).type(torch.long)) train_dataloader = DataLoader(dataset=train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=2) model = TextCNN(config) if config.model_path: model.load_state_dict(torch.load(config.model_path)) model.to(device) # 设置优化器 optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) # 设置损失函数 criterion = nn.CrossEntropyLoss() # 定义训练过程 for epoch in range(config.epochs): for batch_idx, (batch_x, batch_y) in enumerate(train_dataloader): batch_x, batch_y = batch_x.to(device), batch_y.to(device) output = model(batch_x) loss = criterion(output, batch_y) if batch_idx % 200 == 0 & config.verbose: print("Train Epoch:{}[{}/{} ({:.0f}%)]\tLoss:{:.6f}".format( epoch + 1, batch_idx * len(batch_x), len(train_dataloader.dataset), 100. * batch_idx / len(train_dataloader), loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() # 保存模型 torch.save(model.state_dict(), './models/model.pth')
class Template: def __init__(self, args): self.config = yaml.load(open('config.yaml', 'r'), Loader=yaml.FullLoader) self.config['dataset'] = args.dataset self.device = torch.device('cuda:{}'.format(self.config['cuda_index']) if torch.cuda.is_available() else 'cpu') if not os.path.exists(self.config['best_model_path']): os.makedirs(self.config['best_model_path']) self.set_seed() def set_seed(self): np.random.seed(self.config['seed']) random.seed(self.config['seed']) torch.manual_seed(self.config['seed']) torch.cuda.manual_seed(self.config['seed']) torch.backends.cudnn.deterministic = True def train(self, epoch, dataset, mode='train'): criterion = nn.CrossEntropyLoss() dataiter = tqdm(dataset, total=len(dataset), file=sys.stdout) if mode == 'train' else dataset self.metircs.clear() for index, data in enumerate(dataiter): self.model.zero_grad() input_ids = data['input_ids'].to(self.device) input_labels = data['input_labels'].to(self.device) predict_labels = self.model(input_ids) if mode == 'train': loss = criterion(predict_labels, input_labels) loss.backward() nn.utils.clip_grad_norm_( filter(lambda x: x.requires_grad, self.model.linear.parameters()), self.config['max_grad_norm']) self.optimizer.step() # self.model.zero_grad() else: with torch.no_grad(): loss = criterion(predict_labels, input_labels) self.metircs.add_item(loss, predict_labels, input_labels) description = "Epoch: {}, loss: {:.4f}, accuracy: {:4f}".format( epoch, *self.metircs.get_score()) if mode == 'train': dataiter.set_description(description) if mode != 'train': # description = "{} score: {}, loss: {:.4f}, accuracy: {:4f}".format(mode.title(), epoch, *self.metircs.get_score()) loss, acc = self.metircs.get_score() self.metircs.get_final(epoch, mode) if self.metircs.best_iter == epoch: torch.save( { 'epoch': epoch, 'model_state_dict': self.model.state_dict(), }, os.path.join(self.config['best_model_path'], 'best.pt')) description = "{} score: {}, loss: {:.4f}, accuracy: {:4f}, best socre: {:.4f}".format( mode.title(), epoch, loss, acc, self.metircs.best_valid if mode == 'valid' else self.metircs.best_test) print(description) def evaluate(self, epoch, dataset, mode='test'): criterion = nn.CrossEntropyLoss() dataiter = dataset self.metircs.clear() path = os.path.join(self.config['best_model_path'], 'best.pt') self.model.load_state_dict(torch.load(path)['model_state_dict']) self.model.eval() for index, data in enumerate(dataiter): input_ids = data['input_ids'].to(self.device) input_labels = data['input_labels'].to(self.device) predict_labels = self.model(input_ids) with torch.no_grad(): loss = criterion(predict_labels, input_labels) self.metircs.add_item(loss, predict_labels, input_labels) # description = "Epoch: {}, loss: {:.4f}, accuracy: {:4f}".format(epoch, *self.metircs.get_score()) # description = "{} score: {}, loss: {:.4f}, accuracy: {:4f}".format(mode.title(), epoch, *self.metircs.get_score()) loss, acc = self.metircs.get_score() self.metircs.get_final(epoch, 'test') description = "{} score: {}, loss: {:.4f}, accuracy: {:4f}, best socre: {:.4f}".format( mode.title(), epoch, loss, acc, self.metircs.best_test) print(description) def forward(self, train_data, valid_data, test_data): for epoch in range(self.config['epoch_size']): self.model.train() self.train(epoch, train_data, 'train') self.model.eval() self.train(epoch, valid_data, 'valid') self.train(epoch, test_data, 'test') if epoch - self.metircs.best_iter > self.config['patience']: break self.evaluate(epoch, test_data, 'test') loss, acc = self.metircs.get_score() print("Final test: {:.4f}".format(acc)) return acc, self.metircs.best_test # self.train(epoch, test_data, 'test') def main(self): processed_list, alphabet, _, emb_dim = pkl.load( open(self.config['res_path'].format(self.config['dataset']), 'rb')) if isinstance(processed_list, dict): processed_list = [processed_list] scores = [] for data_list in processed_list: train_data = MyDatasetLoader(self.config, data_list, 'train').get_data() valid_data = MyDatasetLoader(self.config, data_list, 'valid').get_data() test_data = MyDatasetLoader(self.config, data_list, 'test').get_data() self.model = TextCNN(self.config, alphabet, emb_dim, self.device).to(self.device) for w in self.model.parameters(): print(w.shape, w.requires_grad) self.optimizer = Adam(filter(lambda x: x.requires_grad, self.model.parameters()), lr=self.config['lr'], weight_decay=float(self.config['l2']), eps=float(self.config['esp'])) self.metircs = Metric() for name, im in self.model.named_parameters(): print(im.shape, name, im.requires_grad) score = self.forward(train_data, valid_data, test_data) scores.append(score) print('| valid best | global best|') print('| --- | --- |') for w in scores: print("| {:.4f} | {:.4f} |".format(w[0], w[1])) if len(scores) > 1: print("valid Avg\tglobal Avg") print("| {:.4f} | {:.4f} |".format(np.mean([w[0] for w in scores]), np.mean([w[1] for w in scores])))
if args.save_vocab is not None: torch.save(text_fields.vocab, str(args.save_vocab)) args.embed_num = len(text_fields.vocab) if isinstance(args.kernel_sizes, list): kernel_sizes = [int(k) for k in args.kernel_sizes] else: kernel_sizes = [int(k) for k in args.kernel_sizes[1:-1].split(',')] args.kernel_sizes = kernel_sizes net = TextCNN(args) print("=========================\nModule\n=========================\n") print(net) optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate) steps = 0 best_acc = 0 last_step = 0 net.train() def save_model(model, save_dir, save_prefix, steps, model_name=None): if save_dir is None: return if not os.path.isdir(save_dir): os.makedirs(save_dir) if model_name is not None:
def train(self): best_valid_loss = 1e9 all_valid_loss, all_valid_acc = 0, 0 # CV loop for i in range(self.args.cv_num): model = TextCNN(self.vocab_size, self.pad_idx, self.args).to(device) # model variations (cf. "rand" is default value) if self.args.mode == "static": model.static_embedding.weight.data.copy_(self.embeddings) model.static_embedding.weight.requires_grad = False elif self.args.mode == "non-static": model.static_embedding.data.normal_(0, 1) model.static_embedding.weight.data.copy_(self.embeddings) elif self.args.mode == "multichannel": model.static_embedding.weight.data.copy_(self.embeddings) model.static_embedding.weight.requires_grad = False model.nonstatic_embedding.weight.data.copy_(self.embeddings) optimizer = optim.Adadelta(model.parameters()) model.train() # generate train dataset print(f'>>> {i+1}th dataset is testset') ## ?? dataset = self.dataset_list.copy() del dataset[i] # remove testset dataset = functools.reduce( lambda x, y: x + y, dataset) # Concatenate datasets consecutively. data_loader = DataLoader(dataset=dataset, batch_size=self.args.batch_size, shuffle=True, collate_fn=self.collate_fn) for epoch in range(self.args.epochs): # Epoch loop pbar = tqdm(data_loader) for text, label in pbar: text = text.to(device) label = label.to(device) optimizer.zero_grad() predictions = model(text).squeeze(1) loss = self.criterion(predictions, label) acc = self._binary_accuracy(predictions, label) loss.backward() optimizer.step() # max_norm_scaling eps = 1e-7 param = model.fc.weight norm = torch.norm(param) # l2_norm if norm > self.args.l2_constraint: param.data *= self.args.l2_constraint / (eps + norm) pbar.set_description( f"loss : {loss.item():.4f}, acc : {acc.item():.4f}") valid_loss, valid_acc = self.evaluate(model, i) all_valid_loss += valid_loss.item() all_valid_acc += valid_acc.item() print( f'valid loss : {valid_loss.item():.3f}, valid acc : {valid_acc.item():.3f}' ) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save( model.state_dict(), osp.join(self.args.ck_path, f'{self.args.name}_best.pt')) if not self.args.cv: return print() print(f'Final loss : {all_valid_loss / self.args.cv_num:.3f}') print(f'Final acc : {all_valid_acc / self.args.cv_num:.3f}')
training_iter = data.DataLoader(dataset=training_set, batch_size=config.batch_size, num_workers=2) deving_iter = data.DataLoader(dataset=deving_set, batch_size=config.batch_size, num_workers=2) config.word_num = len(training_set.tok2num) model = TextCNN(config) if torch.cuda.is_available(): model.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config.lr) training_lossse = [] # Train the model for epoch in range(config.epoch): model.train() for data, label in training_iter: if config.cuda and torch.cuda.is_available(): data = data.cuda() labels = label.cuda() out = model(data) loss = criterion(out, label) training_lossse.append(loss.item())
def main(): device = torch.device('cuda') embedding_vectors = torch.load(f'{EMBEDDINGS_DIR}/vectors.pkl') text_processor = TextProcessor( wti=pickle.load(open(f'{EMBEDDINGS_DIR}/wti.pkl', 'rb')), tokenizer=get_tokenizer('basic_english'), standardize=True, min_len=3, ) dataset = TextDataset(CORPUS_DIR, text_processor) # split into training and test set # TODO: fix this splitting sometimes failing when corpus size changes train_set, test_set = torch.utils.data.random_split( dataset, [ int(len(dataset) * DATA_SPLIT), int(len(dataset) * (1.0 - DATA_SPLIT)) ]) # count number of samples in each class class_count = [0, 0] for data, label in dataset: class_count[int(label.item())] += 1 # get relative weights for classes _sum = sum(class_count) class_count[0] /= _sum class_count[1] /= _sum # reverse the weights since we're getting the inverse for the sampler class_count = list(reversed(class_count)) # set weight for every sample weights = [class_count[int(x[1].item())] for x in train_set] # weighted sampler sampler = torch.utils.data.WeightedRandomSampler( weights=weights, num_samples=len(train_set), replacement=True) train_loader = DataLoader(dataset=train_set, batch_size=32, collate_fn=Sequencer(SEQUENCE_LEN), sampler=sampler) test_loader = DataLoader(dataset=test_set, batch_size=32, collate_fn=Sequencer(SEQUENCE_LEN)) # number of filters in each convolutional filter N_FILTERS = 64 # sizes and number of convolutional layers FILTER_SIZES = [2, 3] # dropout for between conv and dense layers DROPOUT = 0.5 model = TextCNN( embeddings=embedding_vectors, n_filters=N_FILTERS, filter_sizes=FILTER_SIZES, dropout=DROPOUT, ).to(device) print(model) print('Trainable params:', sum(p.numel() for p in model.parameters() if p.requires_grad)) criterion = nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) EPOCHS = 12 best_acc = 0.0 # training loop for epoch in range(EPOCHS): print('Epoch', epoch + 1) for i, data in tqdm(enumerate(train_loader), total=len(train_loader)): # get word indices vector and corresponding labels x, labels = data # send to device x = x.to(device) labels = labels.to(device) # make predictions predictions = model(x).squeeze() # calculate loss loss = criterion(predictions, labels) # learning stuff... optimizer.zero_grad() loss.backward() optimizer.step() # evaluate with torch.no_grad(): model.eval() correct = 0 wrong = 0 m = [[0, 0], [0, 0]] for data in test_loader: x, label = data x = x.to(device) predictions = model(x).squeeze() for truth, prediction in zip(label, predictions): y = int(truth.item()) y_pred = 1 if prediction.item() > 0.5 else 0 m[y][y_pred] += 1 if y == y_pred: correct += 1 else: wrong += 1 model.train() acc = correct / (correct + wrong) if acc > best_acc: best_acc = acc for file in glob.glob('models/model_*.pth'): os.remove(file) torch.save(model.state_dict(), f'models/state_{epoch}.pth') print() print('Correct:', f'{correct}/{correct + wrong}', 'Accuracy:', acc) print('[[TN, FP], [FN, TP]]') print(m) print() # put into evaluation mode model.eval() text_processor.do_standardize = True with torch.no_grad(): while True: text = input('Prompt: ') x = text_processor.process(text) x = torch.tensor(x).unsqueeze(dim=0) print(model(x.to(device)).squeeze())
training_set = TextDataset(path='data/train') training_iter = data.DataLoader(dataset=training_set, batch_size=config.batch_size, num_workers=2) model = TextCNN(config) embeds = nn.Embedding(config.word_num, config.word_embedding_dimension) if torch.cuda.is_available(): model.cuda() embeds = embeds.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=config.lr) count = 0 loss_sum = 0 # Train the model for epoch in range(config.epoch): for data, label in training_iter: if config.cuda and torch.cuda.is_available(): data = data.cuda() labels = label.cuda() input_data = embeds(autograd.Variable(data)) out = model(data) loss = criterion(out, autograd.Variable(label.float())) loss_sum += loss.data[0]
print('building model') model = TextCNN(args, W) if torch.cuda.is_available(): print("USE CUDA") model.cuda() # 训练 test_sub = np.zeros((len(test_df), 9), dtype=np.float) test_train = np.zeros((2000, 9), dtype=np.float) # Loss and Optimizer criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, list(model.parameters())), lr=args.learning_rate) iter_per_epoch = len(train_data) print("loader size %d %d" % (len(train_data), len(text_train))) best_validate_acc = 0.000 best_test_acc = 0.000 best_loss = 100 best_validate_dir = '' best_list = [0, 0] for epoch in range(args.num_epochs): len_dataloader = min(len(source_loader), len(domain_loader)) data_source_iter = iter(source_loader) data_target_iter = iter(domain_loader)
batch_size=config.batch_size, shuffle=False, num_workers=2) test_set = MR_Dataset(state="test", k=i, embedding_type="no") test_iter = torch.utils.data.DataLoader(dataset=test_set, batch_size=config.batch_size, shuffle=False, num_workers=2) model = TextCNN(config) if config.cuda and torch.cuda.is_available(): model.cuda() config.embedding_pretrained.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) count = 0 loss_sum = 0 for epoch in range(config.epoch): # 开始训练 model.train() for data, label in training_iter: if config.cuda and torch.cuda.is_available(): data = data.cuda() label = label.cuda() else: data = torch.autograd.Variable(data).long() label = torch.autograd.Variable(label).squeeze() out = model(data) l2_loss = config.l2 * torch.sum( torch.pow(list(model.parameters())[1], 2))
def train(): # 配置文件 cf = Config('./config.yaml') # 有GPU用GPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 训练数据 train_data = NewsDataset("./data/cnews_final_train.txt",cf.max_seq_len) train_dataloader = DataLoader(train_data,batch_size=cf.batch_size,shuffle=True) # 测试数据 test_data = NewsDataset("./data/cnews_final_test.txt",cf.max_seq_len) test_dataloader = DataLoader(test_data,batch_size=cf.batch_size,shuffle=True) # 预训练词向量矩阵 embedding_matrix = get_pre_embedding_matrix("./data/final_vectors") # 模型 model = TextCNN(cf,torch.tensor(embedding_matrix)) # 优化器用adam optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters())) # 把模型放到指定设备 model.to(device) # 让模型并行化运算 if torch.cuda.device_count()>1: model = torch.nn.DataParallel(model) # 训练 start_time = time.time() total_batch = 0 # 总批次 best_acc_val = 0.0 # 最佳验证集准确率 last_improved = 0 # 记录上一次提升批次 require_improvement = 1000 # 如果超过1000轮未提升,提前结束训练 flag = False model.train() for epoch_id in trange(cf.epoch,desc="Epoch"): for step,batch in enumerate(tqdm(train_dataloader,"batch",total=len(train_dataloader))): label_id = batch['label_id'].squeeze(1).to(device) segment_ids = batch['segment_ids'].to(device) loss = model(segment_ids,label_id) loss.backward() optimizer.step() optimizer.zero_grad() total_batch += 1 if total_batch % cf.print_per_batch == 0: model.eval() with torch.no_grad(): loss_train,acc_train = model.get_loss_acc(segment_ids,label_id) loss_val,acc_val = evaluate(model,test_dataloader,device) if acc_val > best_acc_val: # 保存最好结果 best_acc_val = acc_val last_improved = total_batch torch.save(model.state_dict(),"./output/model.bin") improved_str = "*" else: improved_str = "" time_dif = get_time_dif(start_time) msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \ + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}' print(msg.format(total_batch, loss_train, acc_train, loss_val, acc_val, time_dif, improved_str)) model.train() if total_batch - last_improved > require_improvement: print("长时间未优化") flag = True break if flag: break
model_name = "textcnn.pt" # do text parsing, get vocab size and class count build_vocab(args.train, args.output_vocab_label, args.output_vocab_word) label2id, id2label = load_vocab(args.output_vocab_label) word2id, id2word = load_vocab(args.output_vocab_word) vocab_size = len(word2id) num_class = len(label2id) # set model model = TextCNN(vocab_size = vocab_size, num_class=num_class, emb_dim=args.embedding_dim, emb_droprate=args.embedding_droprate, seq_len=args.sequence_len, filter_count=args.filter_count, kernel_size=kernel_size, conv_droprate=args.conv_droprate) model.build() model.to(device) criterion = nn.CrossEntropyLoss().to(device) optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=1e-6) writer.add_graph(model, torch.randint(low=0,high=1000, size=(args.batch_size, args.sequence_len), dtype=torch.long).to(device)) # padding sequence with <PAD> def padding(data, fix_length, pad, add_first="", add_last=""): if add_first: data.insert(0, add_first) if add_last: data.append(add_last) pad_data = [] data_len = len(data) for idx in range(fix_length): if idx < data_len: pad_data.append(data[idx]) else: pad_data.append(pad)
def train(args, states=None): config_obj = Config(args.config_file) config = config_obj.elements # make training runs deterministic set_seed(seed_value=config['random_seed']) logging.info("Loading datasets...") dataset, labels = load_embeddings(data_path=config['data'], label_path=config['labels']) train_loader, val_loader, test_loader = create_dataloaders( dataset, labels, batch_size=config['batch_size'], random_seed=config['random_seed'], balance=config['correct_imbalance'], ) model = TextCNN( num_classes=config['num_classes'], embedding_size=config['embedding_size'], num_filters=config['num_filters'], dropout_rate=config['dropout'], ) if torch.cuda.is_available(): model.cuda() loss_function = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=config['lr']) best_metric = 0 # loop over the dataset multiple times for epoch in range(1, config['num_epochs'] + 1): logging.info( f"==================== Epoch: {epoch} ====================") running_losses = [] for i, data in enumerate(train_loader, 0): # get the inputs; data is a list of [inputs, labels] inputs, labels = data if torch.cuda.is_available(): inputs, labels = inputs.cuda(), labels.cuda() # zero the parameter gradients before each pass optimizer.zero_grad() # forward probs, classes = model(inputs) # backprop loss = loss_function(probs, labels) loss.backward() # update/optimize optimizer.step() # Log summary running_losses.append(loss.item()) if i % args.log_interval == 0: interval_loss = sum(running_losses) / len(running_losses) logging.info(f"step = {i}, loss = {interval_loss}") running_losses = [] if i % args.test_interval == 0: dev_metric = eval( val_loader, model, loss_function, args.eval_metric, ) if dev_metric > best_metric: best_metric = dev_metric states = { "epoch": epoch, "step": i, "model": model.state_dict(), "optimizer": optimizer.state_dict() } save_model_state(save_dir=args.model_dir, step=i, states=states) print(f"Finished Training, best {args.eval_metric}: {best_metric}")
PAD = 0 model_name = 'GoogleNews-vectors-negative300.bin' word2vec = gensim.models.KeyedVectors.load_word2vec_format(model_name, binary=True) vocab_file = make_vocab(data_file, vocab_output_file) vocab2idx = convert_vocab_to_idx(vocab_output_file) word_embedding = load_word_embedding(vocab2idx, word2vec) #训练集划分 X, Y = load_data(data_file, vocab2idx) x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.1, random_state=1) train_dataset = TensorDataset(torch.from_numpy(x_train), torch.from_numpy(y_train)) test_dataset = TensorDataset(torch.from_numpy(x_test), torch.from_numpy(y_test)) train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True) test_loader = DataLoader(test_dataset, batch_size=64) #模型准备 model = TextCNN(word_embedding) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) loss_func = nn.CrossEntropyLoss() #模型训练 train(model, device, optimizer, loss_func)
def train(config): try: split = config["split"] data_path = config["data_path"] pretrained_model_dir = config["pretrained_model_dir"] pretrained_model_file = config["pretrained_model_file"] last_model_path = config["last_model_path"] save_to = config["save_to"] min_freq = config["min_freq"] batch_size = config["batch_size"] max_sent_length = config["max_sent_length"] embed_dim = config["embed_dim"] filter_num = config["filter_num"] filter_widths = config["filter_widths"] learning_rate = config["learning_rate"] patience = config["patience"] lr_decay = config["lr_decay"] max_num_trial = config["max_num_trial"] max_epoch = config["max_epoch"] save_every = config["save_every"] cuda = config["cuda"] debug = config["debug"] except KeyError: print("Input Parameter Error") exit(1) if not Path(save_to).exists(): Path(save_to).mkdir() device = torch.device("cuda:0" if ( torch.cuda.is_available() and cuda) else "cpu") # build torchtext field TEXT = torchtext.data.Field(tokenize='spacy', lower=True) LABEL = torchtext.data.Field(dtype=torch.long) train_data, test_data = IMDB.splits(TEXT, LABEL, root=data_path) if debug: train_data, val_data = train_data.split(split_ratio=0.1) train_data, val_data = train_data.split(split_ratio=0.7) train_iter, val_iter = torchtext.data.Iterator.splits( (train_data, val_data), batch_size=batch_size, device=device) if (pretrained_model_file is not None) and (pretrained_model_dir is not None): pretrained_vector = Vectors(name=pretrained_model_file, cache=pretrained_model_dir) TEXT.build_vocab(train_data, min_freq=min_freq, vectors=pretrained_vector) LABEL.build_vocab(train_data) logging.info("saving TEXT/LABEL vocabulary...") with open(f"{save_to}/TEXT_vocab.bin", "wb") as f: dill.dump(TEXT, f) with open(f"{save_to}/LABEL_vocab.bin", "wb") as f: dill.dump(LABEL, f) assert embed_dim == TEXT.vocab.vectors.shape[ -1], "incompatiable embeddings" embed_num, class_num = len(TEXT.vocab), len(LABEL.vocab) model = TextCNN(embed_num, embed_dim, class_num, filter_num, filter_widths, from_pretrained=TEXT.vocab.vectors).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) cross_entropy = nn.CrossEntropyLoss(weight=torch.tensor( [0, 0, 1.0, 1.0], device=device)) # class [<unk>,<pad>,'pos','neg'] if last_model_path is not None: # load model logging.info(f'load model from {last_model_path}') params = torch.load(last_model_path, map_location=lambda storage, loc: storage) model.load_state_dict(params['state_dict']) logging.info('restore parameters of the optimizers') optimizer.load_state_dict(torch.load(last_model_path + '.optim')) model.train() epoch = 0 cur_trial = 0 hist_valid_scores = [] train_time = begin_time = time.time() logging.info("begin training!") while True: epoch += 1 train_loss = 0 cum_cnt = 0 step = 0 for batch in iter(train_iter): feature, target = batch.text.T, batch.label.squeeze(0) step += 1 optimizer.zero_grad() res = model(feature) loss = cross_entropy(res, target) train_loss += loss loss.backward() optimizer.step() train_loss = train_loss / step val_loss, accuracy = evaluate(model, val_iter, cross_entropy) logging.info( f'epoch {epoch}\t train_loss: {train_loss}\t val_loss:{val_loss}\t val_accuracy:{accuracy} speed:{time.time()-train_time:.2f}s/epoch\t time elapsed {time.time()-begin_time:.2f}s' ) train_time = time.time() is_better = len( hist_valid_scores) == 0 or val_loss < min(hist_valid_scores) hist_valid_scores.append(val_loss) if epoch % save_every == 0: model.save(f"{save_to}/model_step_{epoch}") torch.save(optimizer.state_dict(), f"{save_to}/model_step_{epoch}.optim") if is_better: cur_patience = 0 model_save_path = f"{save_to}/model_best" print(f'save currently the best model to [{model_save_path}]') model.save(model_save_path) # also save the optimizers' state torch.save(optimizer.state_dict(), model_save_path + '.optim') elif cur_patience < patience: cur_patience += 1 print('hit patience %d' % cur_patience) if cur_patience == patience: cur_trial += 1 print(f'hit #{cur_trial} trial') if cur_trial == max_num_trial: print('early stop!') exit(0) # decay lr, and restore from previously best checkpoint lr = optimizer.param_groups[0]['lr'] * lr_decay logging.info( f'load previously best model and decay learning rate to {lr}' ) # load model params = torch.load(model_save_path, map_location=lambda storage, loc: storage) model.load_state_dict(params['state_dict']) model = model.to(device) logging.info('restore parameters of the optimizers') optimizer.load_state_dict( torch.load(model_save_path + '.optim')) # set new lr for param_group in optimizer.param_groups: param_group['lr'] = lr # reset patience cur_patience = 0 if epoch == max_epoch: print('reached maximum number of epochs!') exit(0)
writer = SummaryWriter(log_dir=config.tensorboard, flush_secs=int(config.tensorboard_flush_sec)) checkpoint_path = "checkpoints" if os.path.exists(checkpoint_path): shutil.rmtree(checkpoint_path) os.mkdir(checkpoint_path) #print(config) # set model model = TextCNN(config) model.cuda() criterion = nn.CrossEntropyLoss().cuda() optimizer = optim.Adam( [param for param in model.parameters() if param.requires_grad == True], lr=config.lr, eps=config.eps, weight_decay=config.weight_decay) writer.add_graph( model, torch.randint(low=0, high=100, size=(config.batch_size, 32), dtype=torch.long).cuda()) print( summary( model, torch.randint(low=0, high=100,
weight = torch.zeros(args.vocab_size, args.embedding_size) for i in range(len(wvmodel.index2word)): try: index = word_to_idx[wvmodel.index2word[i]] except: continue weight[index, :] = torch.from_numpy( wvmodel.get_vector(idx_to_word[word_to_idx[wvmodel.index2word[i]]])) # 加载模型 net = TextCNN(args, weight).to(device) # 定义损失函数和优化器 criterion = nn.CrossEntropyLoss() # 使用交叉熵损失函数 optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, net.parameters()), lr=args.lr) # 模型训练 print('training on ', device) best_acc = 0.0 step = 0 for epoch in range(1, args.num_epochs + 1): net.train() for X, y in train_iter: X, y = X.to(device), y.to(device) y_hat = net(X) # 计算预测概率值 loss = criterion(y_hat, y) # 计算loss值 optimizer.zero_grad() # 梯度置零 loss.backward() # 反向传播 optimizer.step() # 参数更新
embedding_size = 2 # n-gram sequence_length = 3 num_classes = len(LABEL.vocab) # 0 or 1 vocab_size = len(TEXT.vocab) filter_sizes = [2, 2, 2] # n-gram window num_filters = 3 batch = next(iter(train_iter)) input_batch = batch.text.to(device) target_batch = batch.label.to(device) model = TextCNN(embedding_size, sequence_length, num_classes, filter_sizes, num_filters, vocab_size).to(device) print(model) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) # Training for epoch in range(5000): optimizer.zero_grad() output = model(input_batch) # output : [batch_size, num_classes], target_batch : [batch_size] (LongTensor, not one-hot) loss = criterion(output, target_batch) if (epoch + 1) % 1000 == 0: print('Epoch:', '%04d' % (epoch + 1), 'cost =', '{:.6f}'.format(loss)) loss.backward() optimizer.step()
kwargs = { 'nb_classes': nb_classes, 'vocab_size': vocab_size, 'input_size': word_dim, 'filter_shape': filter_shape, 'pretrained_embed': pretrained_embed, 'dropout_rate': dropout_rate } # 初始化模型 use_cuda = opts.cuda text_cnn = TextCNN(kwargs) print(text_cnn) if use_cuda: text_cnn = text_cnn.cuda() optimizer = torch.optim.Adam(text_cnn.parameters(), lr=0.001) criterion = torch.nn.CrossEntropyLoss() # 训练 t0 = time() nb_epoch = opts.nb_epoch max_patience = opts.max_patience current_patience = 0 root_model = opts.root_model if not os.path.exists(root_model): os.makedirs(root_model) path_model = os.path.join(root_model, 'textcnn.model') best_dev_loss = 1000. for epoch in range(nb_epoch): sys.stdout.write('epoch {0} / {1}: \r'.format(epoch, nb_epoch)) total_loss, dev_loss = 0., 0.
data_analysis(train, valid, TEXT) if use_bert: model = BertForSequenceClassification.from_pretrained( 'bert-base-uncased', num_labels=num_classes).to(device) else: model = TextCNN(TEXT.vocab, embed_size, num_filters, num_classes, pretrain=pretrain).to(device) # model = TextLSTM(TEXT.vocab, embed_size, hidden_size, num_classes, pretrain=pretrain).to(device) # model = ESIM(TEXT.vocab, embed_size, hidden_size, num_classes, pretrain=pretrain, dropout=dropout).to(device) print(model) total_num = sum(p.numel() for p in model.parameters()) trainable_num = sum(p.numel() for p in model.parameters() if p.requires_grad) print('Total:', total_num) print('Trainable:', trainable_num) criterion = torch.nn.CrossEntropyLoss().to(device) optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) #scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9) """ batch = next(iter(train_iter)) output = model(batch.text) if use_bert: output = output[0] print(batch.text[:4]) print(batch.label[:4])