示例#1
0
def main(args):

    word_vec = pickle.load(open(args.word_vec, 'rb'))
    print('complete loading word vectors')

    train_text, null_index = read_conll(args.train_file)
    if args.test_file != '':
        test_text, null_index = read_conll(args.test_file)
    else:
        test_text = train_text

    train_data = sents_to_vec(word_vec, train_text)
    test_data = sents_to_vec(word_vec, test_text)

    test_tags = [sent["tag"] for sent in test_text]

    num_dims = len(train_data[0][0])
    print('complete reading data')

    print('#training sentences: %d' % len(train_data))
    print('#testing sentences: %d' % len(test_data))

    log_niter = (len(train_data) // args.batch_size) // 10

    pad = np.zeros(num_dims)
    device = torch.device("cuda" if args.cuda else "cpu")
    args.device = device
    init_seed = to_input_tensor(generate_seed(train_data, args.batch_size),
                                pad,
                                device=device)

    model = MarkovFlow(args, num_dims).to(device)

    model.init_params(init_seed)

    if args.tag_from != '':
        model.eval()
        with torch.no_grad():
            accuracy, vm = model.test(test_data,
                                      test_tags,
                                      sentences=test_text,
                                      tagging=True,
                                      path=args.tag_path,
                                      null_index=null_index)
        print('\n***** M1 %f, VM %f, max_var %.4f, min_var %.4f*****\n' %
              (accuracy, vm, model.var.data.max(), model.var.data.min()),
              file=sys.stderr)
        return

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    begin_time = time.time()
    print('begin training')

    train_iter = report_obj = report_jc = report_ll = report_num_words = 0

    # print the accuracy under init params
    model.eval()
    with torch.no_grad():
        accuracy, vm = model.test(test_data, test_tags)
    print('\n*****starting M1 %f, VM %f, max_var %.4f, min_var %.4f*****\n' %
          (accuracy, vm, model.var.data.max(), model.var.data.min()),
          file=sys.stderr)

    model.train()
    for epoch in range(args.epochs):
        # model.print_params()
        report_obj = report_jc = report_ll = report_num_words = 0
        for sents in data_iter(train_data,
                               batch_size=args.batch_size,
                               shuffle=True):
            train_iter += 1
            batch_size = len(sents)
            num_words = sum(len(sent) for sent in sents)
            sents_var, masks = to_input_tensor(sents, pad, device=args.device)
            optimizer.zero_grad()
            likelihood, jacobian_loss = model(sents_var, masks)
            neg_likelihood_loss = -likelihood

            avg_ll_loss = (neg_likelihood_loss + jacobian_loss) / batch_size

            avg_ll_loss.backward()

            optimizer.step()

            log_likelihood_val = -neg_likelihood_loss.item()
            jacobian_val = -jacobian_loss.item()
            obj_val = log_likelihood_val + jacobian_val

            report_ll += log_likelihood_val
            report_jc += jacobian_val
            report_obj += obj_val
            report_num_words += num_words

            if train_iter % log_niter == 0:
                print('epoch %d, iter %d, log_likelihood %.2f, jacobian %.2f, obj %.2f, max_var %.4f ' \
                      'min_var %.4f time elapsed %.2f sec' % (epoch, train_iter, report_ll / report_num_words, \
                      report_jc / report_num_words, report_obj / report_num_words, model.var.max(), \
                      model.var.min(), time.time() - begin_time), file=sys.stderr)

        print('\nepoch %d, log_likelihood %.2f, jacobian %.2f, obj %.2f\n' % \
            (epoch, report_ll / report_num_words, report_jc / report_num_words,
             report_obj / report_num_words), file=sys.stderr)

        if epoch % args.valid_nepoch == 0:
            model.eval()
            with torch.no_grad():
                accuracy, vm = model.test(test_data, test_tags)
            print('\n*****epoch %d, iter %d, M1 %f, VM %f*****\n' %
                  (epoch, train_iter, accuracy, vm),
                  file=sys.stderr)
            model.train()

        torch.save(model.state_dict(), args.save_path)

    model.eval()
    with torch.no_grad():
        accuracy, vm = model.test(test_data, test_tags)
    print('\n complete training, accuracy %f, vm %f\n' % (accuracy, vm),
          file=sys.stderr)
示例#2
0
def main(args):

    word_vec = pickle.load(open(args.word_vec, 'rb'))
    print('complete loading word vectors')

    train_sents, _ = read_conll(args.train_file)
    test_sents, _ = read_conll(args.test_file, max_len=10)
    test_deps = [sent["head"] for sent in test_sents]

    train_emb = sents_to_vec(word_vec, train_sents)
    test_emb = sents_to_vec(word_vec, test_sents)

    num_dims = len(train_emb[0][0])

    train_tagid, tag2id = sents_to_tagid(train_sents)
    print('%d types of tags' % len(tag2id))
    id2tag = {v: k for k, v in tag2id.items()}

    pad = np.zeros(num_dims)
    device = torch.device("cuda" if args.cuda else "cpu")
    args.device = device

    model = dmv.DMVFlow(args, id2tag, num_dims).to(device)

    init_seed = to_input_tensor(generate_seed(train_emb, args.batch_size),
                                pad,
                                device=device)

    with torch.no_grad():
        model.init_params(init_seed, train_tagid, train_emb)
    print('complete init')

    if args.train_from != '':
        model.load_state_dict(torch.load(args.train_from))
        with torch.no_grad():
            directed, undirected = model.test(test_deps,
                                              test_emb,
                                              verbose=False)
        print('acc on length <= 10: #trees %d, undir %2.1f, dir %2.1f' \
              % (len(test_gold), 100 * undirected, 100 * directed))

    optimizer = torch.optim.Adam(model.parameters(), lr=args.lr)

    log_niter = (len(train_emb) // args.batch_size) // 5
    report_ll = report_num_words = report_num_sents = epoch = train_iter = 0
    stop_avg_ll = stop_num_words = 0
    stop_avg_ll_last = 1
    dir_last = 0
    begin_time = time.time()

    print('begin training')

    with torch.no_grad():
        directed, undirected = model.test(test_deps, test_emb)
    print('starting acc on length <= 10: #trees %d, undir %2.1f, dir %2.1f' \
          % (len(test_deps), 100 * undirected, 100 * directed))

    for epoch in range(args.epochs):
        report_ll = report_num_sents = report_num_words = 0
        for sents in data_iter(train_emb, batch_size=args.batch_size):
            batch_size = len(sents)
            num_words = sum(len(sent) for sent in sents)
            stop_num_words += num_words
            optimizer.zero_grad()

            sents_var, masks = to_input_tensor(sents, pad, device)
            sents_var, _ = model.transform(sents_var)
            sents_var = sents_var.transpose(0, 1)
            log_likelihood = model.p_inside(sents_var, masks)

            avg_ll_loss = -log_likelihood / batch_size

            avg_ll_loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), args.clip_grad)
            optimizer.step()

            report_ll += log_likelihood.item()
            report_num_words += num_words
            report_num_sents += batch_size

            stop_avg_ll += log_likelihood.item()

            if train_iter % log_niter == 0:
                print('epoch %d, iter %d, ll_per_sent %.4f, ll_per_word %.4f, ' \
                      'max_var %.4f, min_var %.4f time elapsed %.2f sec' % \
                      (epoch, train_iter, report_ll / report_num_sents, \
                      report_ll / report_num_words, model.var.data.max(), \
                      model.var.data.min(), time.time() - begin_time), file=sys.stderr)

            train_iter += 1
        if epoch % args.valid_nepoch == 0:
            with torch.no_grad():
                directed, undirected = model.test(test_deps, test_emb)
            print('\n\nacc on length <= 10: #trees %d, undir %2.1f, dir %2.1f, \n\n' \
                  % (len(test_deps), 100 * undirected, 100 * directed))

        stop_avg_ll = stop_avg_ll / stop_num_words
        rate = (stop_avg_ll - stop_avg_ll_last) / abs(stop_avg_ll_last)

        print('\n\nlikelihood: %.4f, likelihood last: %.4f, rate: %f\n' % \
                (stop_avg_ll, stop_avg_ll_last, rate))

        if rate < 0.001 and epoch >= 5:
            break

        stop_avg_ll_last = stop_avg_ll
        stop_avg_ll = stop_num_words = 0

    torch.save(model.state_dict(), args.save_path)

    # eval on all lengths
    if args.eval_all:
        test_sents, _ = read_conll(args.test_file)
        test_deps = [sent["head"] for sent in test_sents]
        test_emb = sents_to_vec(word_vec, test_sents)
        print("start evaluating on all lengths")
        with torch.no_grad():
            directed, undirected = model.test(test_deps,
                                              test_emb,
                                              eval_all=True)
        print('accuracy on all lengths: number of trees:%d, undir: %2.1f, dir: %2.1f' \
              % (len(test_gold), 100 * undirected, 100 * directed))