Exemplo n.º 1
0
def start_denotation(opts):
    print "Loading data..."
    # load training data
    # trees, vocab = tr.loadTrees(opts.dataset,opts.data)
    # chunked_lines = tr.get_chunked_lines(opts.dataset, opts.data)
    opts.numWords = len(tr.loadWordMap(opts.dataset))
    print "vocab size: %d" % opts.numWords

    print "Loading word2vec vectors..."
    # Load pre-built word matrix using cPickle
    x = pickle.load(open("mr_%s.p" % opts.dataset, "rb"))
    W = x[0]
    rnn = nnet_rte.RNNRTE(opts.wvecDim, opts.outputDim, 200, opts.numWords, opts.minibatch)
    rnn.initParams(W)
    sgd = optimizer.SGD(rnn, alpha=opts.step, minibatch=opts.minibatch, optimizer=opts.optimizer)

    for e in range(opts.epochs):
        start = time.time()
        print "Running epoch %d" % e
        sgd.run_denotation(tr.get_lines(opts.dataset, opts.data))
        end = time.time()
        print "Time per epoch : %f" % (end - start)

        with open(opts.outFile, "w") as fid:
            pickle.dump(opts, fid)
            pickle.dump(sgd.costt, fid)

            # debug
            if e == opts.epochs - 1:
                rnn.toFile(fid, True)
            else:
                rnn.toFile(fid)
Exemplo n.º 2
0
def test_denotation(netFile, data, dataset):
    # trees, vocab = tr.loadTrees(dataset,data)
    assert netFile is not None, "Must give model to test"
    with open(netFile, "r") as fid:
        opts = pickle.load(fid)
        _ = pickle.load(fid)

        x = pickle.load(open("mr_%s.p" % dataset, "rb"))
        W = x[0]
        W2 = 0.01 * np.random.randn(opts.wvecDim, opts.numWords)
        rnn = nnet_rte.RNNRTE(opts.wvecDim, opts.outputDim, 200, opts.numWords, opts.minibatch)
        rnn.initParams(W)
        rnn.fromFile(fid)

        lines = tr.get_lines(opts.dataset, opts.data)
        m = len(lines)
        CHUNK_SIZE = 10000
        minibatch = opts.minibatch
        print "Testing..."

        cost = correct = total = 0
        for i in xrange(0, m - CHUNK_SIZE + 1, CHUNK_SIZE):
            # Get data as much as it's specified by minibatch num
            # Load&parse trees beforehand
            # Get data as much as it's specified by minibatch num
            # Parse&load tree data beforehand
            trees = list(tr.inputarray(lines[i : i + CHUNK_SIZE], dataset))

            # map word indices to loaded trees
            tr.map_words_to_trees(trees, dataset)

            c, cor, tot = rnn.costAndGrad(trees, test=True)
            cost += c
            correct += cor
            total += tot
            if i % CHUNK_SIZE == 0:
                print "tested: %d" % i

    print "Cost %f, Correct %d/%d, Acc %f" % (cost, correct, total, correct / float(total))
Exemplo n.º 3
0
def start(opts):
    print "Loading data..."
    # load training data
    trees, vocab = tr.loadTrees(opts.dataset, opts.data)  # sick, train_parsed
    opts.numWords = len(tr.loadWordMap(opts.dataset))

    print "Loading word2vec vectors..."
    # Load pre-built word matrix using cPickle
    # w2v_file = "/Users/pentiumx/Projects/word2vec/GoogleNews-vectors-negative300.bin"
    # word_vecs = process_data.load_bin_vec(w2v_file, vocab)
    # revs, W, W2, word_idx_map, vocab = x[0], x[1], x[2], x[3], x[4]

    x = pickle.load(open("mr_%s.p" % opts.dataset, "rb"))
    W = x[0]
    W2 = 0.01 * np.random.randn(opts.wvecDim, opts.numWords)

    # rnn = nnet.RNN(opts.wvecDim,opts.outputDim,opts.numWords,opts.minibatch)
    # embeddingDim=200 for now

    if opts.use_denotation == 0:
        rnn = nnet_rte.RNNRTE(opts.wvecDim, opts.outputDim, 200, opts.numWords, opts.minibatch)
        rnn.initParams(W)  # Use W2 for experiments with randomly initialized vectors
        sgd = optimizer.SGD(rnn, alpha=opts.step, minibatch=opts.minibatch, optimizer=opts.optimizer)
    else:
        """with open('models/denotation_sample.bin','r') as fid:
            _ = pickle.load(fid)# skip opts data
            __ = pickle.load(fid)

            x = pickle.load(open("mr_%s.p" % opts.dg_dataset, "rb"))
            W_dg = x[0]
            rnn = nnet_rte.RNNRTE(opts.wvecDim,opts.outputDim,200,opts.numWords,opts.minibatch)
            rnn.initParams(W, W_dg)
            rnn.from_file_denotation(fid)

            sgd = optimizer.SGD(rnn,alpha=opts.step,minibatch=opts.minibatch,
                optimizer=opts.optimizer)"""
        rnn = nnet_rte.RNNRTE(opts.wvecDim, opts.outputDim, 200, opts.numWords, opts.minibatch)
        rnn.initParams(W, True)

        x = pickle.load(open("mr_%s.p" % opts.dg_dataset, "rb"))
        W_dg = x[0]
        rnn_dg = nnet_rte.RNNRTE(opts.wvecDim, 2, 200, opts.numWords, opts.minibatch)
        rnn_dg.initParams(W_dg, True)

        sgd = optimizer.SGD(rnn, alpha=opts.step, minibatch=opts.minibatch, optimizer=opts.optimizer, model_dg=rnn_dg)

    for e in range(opts.epochs):
        start = time.time()
        print "Running epoch %d" % e
        if opts.use_denotation == 0:
            sgd.run(trees)
        else:
            lines = tr.get_lines(opts.dg_dataset, opts.data)
            sgd.run_using_denotation(trees, lines)
        end = time.time()
        print "Time per epoch : %f" % (end - start)

        with open(opts.outFile, "w") as fid:
            pickle.dump(opts, fid)
            pickle.dump(sgd.costt, fid)

            # debug
            if e == opts.epochs - 1:
                rnn.toFile(fid, True)
            else:
                rnn.toFile(fid)