Exemplo n.º 1
0
def train(data_source):
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.batch_size)
    hidden_lang = model.init_hidden(args.batch_size)
    criterion = nn.CrossEntropyLoss()

    batch_idx = 0

    num_batch = math.ceil(data_source.size(0) / args.bptt)
    # print(num_batch, data_source.size(0), args.bptt)
    indices = np.arange(num_batch)
    np.random.shuffle(indices)

    for batch, i in enumerate(range(0, data_source.size(0) - 1, args.bptt)):
        sys.stdout.flush()
        # print(">>", batch, i,  indices[batch] * args.bptt)
        data, targets = get_batch(data_source, indices[batch] * args.bptt)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        hidden_lang = repackage_hidden(hidden_lang)
        model.zero_grad()

        output, hidden = model(data, hidden)

        loss = criterion(output.view(-1, ntokens), targets)
        loss.backward()
        batch_idx += data.size(1)

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip)
        opt = optim.SGD(model.parameters(), lr=lr)
        opt.step()

        total_loss += loss.data

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss.item() / args.log_interval
            elapsed = time.time() - start_time

            log = '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | word_loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch,
                len(data_source) // args.bptt,
                lr, elapsed * 1000 / args.log_interval, cur_loss,
                math.exp(cur_loss))

            printhelper.print_log(log_file, log)

            total_loss = 0
            start_time = time.time()
Exemplo n.º 2
0
def train():
    # Turn on training mode which enables dropout.
    model.train()
    total_loss = 0
    start_time = time.time()
    ntokens = len(corpus.dictionary)
    hidden = model.init_hidden(args.nlayers, args.batch_size, args.nhid)
    hidden_postag = model.init_hidden(args.postagnlayers, args.batch_size,
                                      args.postagnhid)
    criterion = nn.CrossEntropyLoss()

    for batch, i in enumerate(range(0, train_data.size(0) - 1, args.bptt)):
        data_postag, targets_postag = get_batch(train_postag_data, i)
        data, targets = get_batch(train_data, i)

        # Starting each batch, we detach the hidden state from how it was previously produced.
        # If we didn't, the model would try backpropagating all the way to start of the dataset.
        hidden = repackage_hidden(hidden)
        hidden_postag = repackage_hidden(hidden_postag)
        model.zero_grad()

        output, postag_output, hidden, hidden_postag = model(
            data, data_postag, hidden, hidden_postag)

        postag_loss = criterion(postag_output.view(-1, npostag),
                                targets_postag)
        loss = criterion(output.view(-1, ntokens), targets)

        accu_loss = alpha * loss + beta * postag_loss
        accu_loss.backward()

        # `clip_grad_norm` helps prevent the exploding gradient problem in RNNs / LSTMs.
        torch.nn.utils.clip_grad_norm(model.parameters(), args.clip)
        opt = optim.SGD(model.parameters(), lr=lr)
        opt.step()

        total_loss += loss.data

        if batch % args.log_interval == 0 and batch > 0:
            cur_loss = total_loss[0] / args.log_interval
            elapsed = time.time() - start_time

            log = '| epoch {:3d} | {:5d}/{:5d} batches | lr {:02.2f} | ms/batch {:5.2f} | word_loss {:5.2f} | ppl {:8.2f}'.format(
                epoch, batch,
                len(train_data) // args.bptt,
                lr, elapsed * 1000 / args.log_interval, cur_loss,
                math.exp(cur_loss))

            printhelper.print_log(log_file, log)

            total_loss = 0
            start_time = time.time()
Exemplo n.º 3
0
save_path = args.save + "/" + log_name + ".pt"

dir_path = os.path.dirname(os.path.realpath(__file__))

# Set the random seed manually for reproducibility.
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    if not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )
    else:
        torch.cuda.manual_seed(args.seed)

# Write all summary
printhelper.print_log(log_file, "clip\t:" + str(args.clip))
printhelper.print_log(log_file, "data\t:" + str(args.data))
printhelper.print_log(log_file, "start lr\t:" + str(args.lr))
printhelper.print_log(log_file, "em size\t:" + str(args.emsize))
printhelper.print_log(log_file, "postag em size\t:" + str(args.postagemsize))

###############################################################################
# Load data
###############################################################################

corpus = data.Corpus(args.data)


def batchify(data, bsz):
    # Work out how cleanly we can divide the dataset into bsz parts.
    nbatch = data.size(0) // bsz
Exemplo n.º 4
0
is_pad = False
if args.pad:
    is_pad = args.pad

# Set the random seed manually for reproducibility.
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    if not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )
    else:
        torch.cuda.manual_seed(args.seed)

# Write all summary
printhelper.print_log(log_file, "is_pad\t:" + str(is_pad))
printhelper.print_log(log_file, "clip\t:" + str(args.clip))
printhelper.print_log(log_file, "train_path\t:" + str(args.train_path))
printhelper.print_log(log_file, "valid_path\t:" + str(args.valid_path))
printhelper.print_log(log_file, "test_path\t:" + str(args.test_path))
printhelper.print_log(log_file, "start lr\t:" + str(args.lr))
printhelper.print_log(log_file, "em size\t:" + str(args.emsize))


def is_chinese_char(cc):
    return unicodedata.category(cc) == 'Lo'


def is_contain_chinese_word(seq):
    for i in range(len(seq)):
        if is_chinese_char(seq[i]):
Exemplo n.º 5
0
is_pad = False
if args.pad:
    is_pad = args.pad

# Set the random seed manually for reproducibility.
torch.manual_seed(args.seed)
if torch.cuda.is_available():
    if not args.cuda:
        print(
            "WARNING: You have a CUDA device, so you should probably run with --cuda"
        )
    else:
        torch.cuda.manual_seed(args.seed)

# Write all summary
printhelper.print_log(log_file, "is_pad\t:" + str(is_pad))
printhelper.print_log(log_file, "clip\t:" + str(args.clip))
printhelper.print_log(log_file, "start lr\t:" + str(args.lr))
printhelper.print_log(log_file, "em size\t:" + str(args.emsize))


def is_chinese_char(cc):
    return unicodedata.category(cc) == 'Lo'


def is_contain_chinese_word(seq):
    for i in range(len(seq)):
        if is_chinese_char(seq[i]):
            return True
    return False