def test_build_coccurrence_matrix(): """ Tests the model with a small arbitrary data set """ corpus = get_development_data() cooccur, tokenizer = build_coccurrence_matrix(corpus, min_frequency=2) print_tokenizer_information(tokenizer, corpus) print cooccur
def test_minibatch(): """ Tests minibatch using small data set. """ corpus = get_development_data() cooccurrence_matrix, tokenizer = build_coccurrence_matrix(corpus) minibatches = get_cooccurrence_batches(cooccurrence_matrix, 5) for batch in minibatches: i, j, X_ij = batch print 'i: {}'.format(i) print 'j: {}'.format(j) print 'count: {}'.format(X_ij)
def test_train(): """ Tests the cooccurrence matrix with a small dataset """ # Build cooccurrence matrix corpus = get_development_data() cooccurrence_matrix, tokenizer = build_coccurrence_matrix(corpus, min_frequency=2) vocab_size = len(tokenizer.word_index.keys()) embeddings = build_graph_and_train(cooccurrence_matrix, vocab_size, "dev_test", tokenizer) print "Final embeddings:" print embeddings[1]
def test_glove_model(scope): """Tests the model using the first fifteen elements in the training data sets Args: scope: variable name scope for the graph """ corpus = get_development_data() cooccurrence_matrix, tokenizer = build_coccurrence_matrix(corpus) vocab_size = len(tokenizer.word_index.keys()) embeddings = build_graph_and_train(cooccurrence_matrix, vocab_size, scope, tokenizer) print "Final embeddings shape {}:".format(np.array(embeddings).shape) print embeddings[0]
def test_f(): """ Tests the function for preventing common word pairs """ x_ij_max = 100 alpha = 0.75 corpus = get_development_data() cooccurrence_matrix, tokenizer = build_coccurrence_matrix(corpus) minibatches = get_cooccurrence_batches(cooccurrence_matrix, 5) for batch in minibatches: i, j, X_ij = batch print 'count batch: {}'.format(X_ij) f = tf.map_fn( lambda x_ij: tf.cond( x_ij < x_ij_max, lambda: tf.pow(tf.divide( x_ij, x_ij_max), alpha), lambda: tf.cast(1.0, tf.float64)), X_ij) with tf.Session() as sess: print 'f: {}'.format(sess.run(f)) sess.close() # just need to check for one batch return