def train(data_source): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.batch_size) hidden_lang = model.init_hidden(args.batch_size) criterion = nn.CrossEntropyLoss() batch_idx = 0 num_batch = math.ceil(data_source.size(0) / args.bptt) # print(num_batch, data_source.size(0), args.bptt) indices = np.arange(num_batch) np.random.shuffle(indices) for batch, i in enumerate(range(0, data_source.size(0) - 1, args.bptt)): sys.stdout.flush() # print(">>", batch, i, indices[batch] * args.bptt) data, targets = get_batch(data_source, indices[batch] * args.bptt) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) hidden_lang = repackage_hidden(hidden_lang) model.zero_grad() output, hidden = model(data, hidden) loss = criterion(output.view(-1, ntokens), targets) loss.backward() batch_idx += data.size(1) # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip) opt = optim.SGD(model.parameters(), lr=lr) opt.step() total_loss += loss.data if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss.item() / args.log_interval elapsed = time.time() - start_time log = '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | word_loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(data_source) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)) printhelper.print_log(log_file, log) total_loss = 0 start_time = time.time()
def train(): # Turn on training mode which enables dropout. model.train() total_loss = 0 start_time = time.time() ntokens = len(corpus.dictionary) hidden = model.init_hidden(args.nlayers, args.batch_size, args.nhid) hidden_postag = model.init_hidden(args.postagnlayers, args.batch_size, args.postagnhid) criterion = nn.CrossEntropyLoss() for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)): data_postag, targets_postag = get_batch(train_postag_data, i) data, targets = get_batch(train_data, i) # Starting each batch, we detach the hidden state from how it was previously produced. # If we didn't, the model would try backpropagating all the way to start of the dataset. hidden = repackage_hidden(hidden) hidden_postag = repackage_hidden(hidden_postag) model.zero_grad() output, postag_output, hidden, hidden_postag = model( data, data_postag, hidden, hidden_postag) postag_loss = criterion(postag_output.view(-1, npostag), targets_postag) loss = criterion(output.view(-1, ntokens), targets) accu_loss = alpha * loss + beta * postag_loss accu_loss.backward() # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs. torch.nn.utils.clip_grad_norm(model.parameters(), args.clip) opt = optim.SGD(model.parameters(), lr=lr) opt.step() total_loss += loss.data if batch % args.log_interval == 0 and batch > 0: cur_loss = total_loss[0] / args.log_interval elapsed = time.time() - start_time log = '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | word_loss {:5.2f} | ppl {:8.2f}'.format( epoch, batch, len(train_data) // args.bptt, lr, elapsed * 1000 / args.log_interval, cur_loss, math.exp(cur_loss)) printhelper.print_log(log_file, log) total_loss = 0 start_time = time.time()
save_path = args.save + "/" + log_name + ".pt" dir_path = os.path.dirname(os.path.realpath(__file__)) # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) if torch.cuda.is_available(): if not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) else: torch.cuda.manual_seed(args.seed) # Write all summary printhelper.print_log(log_file, "clip\t:" + str(args.clip)) printhelper.print_log(log_file, "data\t:" + str(args.data)) printhelper.print_log(log_file, "start lr\t:" + str(args.lr)) printhelper.print_log(log_file, "em size\t:" + str(args.emsize)) printhelper.print_log(log_file, "postag em size\t:" + str(args.postagemsize)) ############################################################################### # Load data ############################################################################### corpus = data.Corpus(args.data) def batchify(data, bsz): # Work out how cleanly we can divide the dataset into bsz parts. nbatch = data.size(0) // bsz
is_pad = False if args.pad: is_pad = args.pad # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) if torch.cuda.is_available(): if not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) else: torch.cuda.manual_seed(args.seed) # Write all summary printhelper.print_log(log_file, "is_pad\t:" + str(is_pad)) printhelper.print_log(log_file, "clip\t:" + str(args.clip)) printhelper.print_log(log_file, "train_path\t:" + str(args.train_path)) printhelper.print_log(log_file, "valid_path\t:" + str(args.valid_path)) printhelper.print_log(log_file, "test_path\t:" + str(args.test_path)) printhelper.print_log(log_file, "start lr\t:" + str(args.lr)) printhelper.print_log(log_file, "em size\t:" + str(args.emsize)) def is_chinese_char(cc): return unicodedata.category(cc) == 'Lo' def is_contain_chinese_word(seq): for i in range(len(seq)): if is_chinese_char(seq[i]):
is_pad = False if args.pad: is_pad = args.pad # Set the random seed manually for reproducibility. torch.manual_seed(args.seed) if torch.cuda.is_available(): if not args.cuda: print( "WARNING: You have a CUDA device, so you should probably run with --cuda" ) else: torch.cuda.manual_seed(args.seed) # Write all summary printhelper.print_log(log_file, "is_pad\t:" + str(is_pad)) printhelper.print_log(log_file, "clip\t:" + str(args.clip)) printhelper.print_log(log_file, "start lr\t:" + str(args.lr)) printhelper.print_log(log_file, "em size\t:" + str(args.emsize)) def is_chinese_char(cc): return unicodedata.category(cc) == 'Lo' def is_contain_chinese_word(seq): for i in range(len(seq)): if is_chinese_char(seq[i]): return True return False