Пример #1
0
def test_build_coccurrence_matrix():
    """ Tests the model with a small arbitrary data set
    """
    corpus = get_development_data()
    cooccur, tokenizer = build_coccurrence_matrix(corpus, min_frequency=2)
    print_tokenizer_information(tokenizer, corpus)
    print cooccur
Пример #2
0
def test_minibatch():
    """ Tests minibatch using small data set.
    """
    corpus = get_development_data()
    cooccurrence_matrix, tokenizer = build_coccurrence_matrix(corpus)
    minibatches = get_cooccurrence_batches(cooccurrence_matrix, 5)
    for batch in minibatches:
        i, j, X_ij = batch
        print 'i:       {}'.format(i)
        print 'j:       {}'.format(j)
        print 'count:   {}'.format(X_ij)
Пример #3
0
def test_train():
    """ Tests the cooccurrence matrix with a small dataset
    """
    # Build cooccurrence matrix
    corpus = get_development_data()
    cooccurrence_matrix, tokenizer = build_coccurrence_matrix(corpus,
                                                              min_frequency=2)
    vocab_size = len(tokenizer.word_index.keys())
    embeddings = build_graph_and_train(cooccurrence_matrix, vocab_size,
                                       "dev_test", tokenizer)
    print "Final embeddings:"
    print embeddings[1]
Пример #4
0
def test_glove_model(scope):
    """Tests the model using the first fifteen elements in the training data sets

    Args:
        scope: variable name scope for the graph
    """
    corpus = get_development_data()
    cooccurrence_matrix, tokenizer = build_coccurrence_matrix(corpus)
    vocab_size = len(tokenizer.word_index.keys())
    embeddings = build_graph_and_train(cooccurrence_matrix, vocab_size, scope,
                                       tokenizer)
    print "Final embeddings shape {}:".format(np.array(embeddings).shape)
    print embeddings[0]
Пример #5
0
def test_f():
    """ Tests the function for preventing common word pairs
    """
    x_ij_max = 100
    alpha = 0.75
    corpus = get_development_data()
    cooccurrence_matrix, tokenizer = build_coccurrence_matrix(corpus)
    minibatches = get_cooccurrence_batches(cooccurrence_matrix, 5)
    for batch in minibatches:
        i, j, X_ij = batch
        print 'count batch: {}'.format(X_ij)
        f = tf.map_fn(
            lambda x_ij: tf.cond(
                x_ij < x_ij_max, lambda: tf.pow(tf.divide(
                    x_ij, x_ij_max), alpha), lambda: tf.cast(1.0, tf.float64)),
            X_ij)
        with tf.Session() as sess:
            print 'f: {}'.format(sess.run(f))
            sess.close()
        # just need to check for one batch
        return