output_layer, train_func, test_func, predict_func, get_hidden_func = word_prediction_network(BATCH_SIZE, word_embedding_size, num_words, MAX_SEQ_LEN, WEIGHTS, NUM_UNITS_GRU, learning_rate)

    estimator = LasagneNet(output_layer, train_func, test_func, predict_func, get_hidden_func, on_epoch_finished=[SaveParams('save_params','word_embedding', save_interval = 1)])
    # estimator.draw_network() # requires networkx package

    X_train = {'X': encoded_sequences[:train_split], 'X_mask': masks[:train_split]}
    y_train = y[:train_split]
    X_test = {'X': encoded_sequences[train_split:test_split], 'X_mask': masks[train_split:test_split]}
    y_test = y[train_split:test_split]
    
    train = False
    if train:
        estimator.fit(X_train, y_train)
    else:
        estimator.load_weights_from('saved_params')
        word2vec_vocab_rev = dict(zip(word2vec_vocab.values(), word2vec_vocab.keys()))  # Maps indeces to words.

        predictions = estimator.predict(X_test)
        predictions = predictions.reshape(-1, num_words + 1)  # Reshape into #samples x #words.
        n_pred = predictions.shape[0]
        for row in range(n_pred):
            # Sample a word from output probabilities.
            sample = np.random.multinomial(n=1, pvals=predictions[row,:]).argmax()
            line = X_test['X'][row]  # Get the input line.
            line = line[X_test['X_mask'][row].astype('bool')]  # Apply mask.
            print 'Input: %r' %([word2vec_vocab_rev[w] for w in line])  # Print words in line.
            print 'Guess:: %r' %(word2vec_vocab_rev[sample])  # Print predicted word.
            print 'Output: %r' %(word2vec_vocab_rev[y_test[row]])  # Print the correct word.
        pdb.set_trace()
    )

    estimator = LasagneNet(
        output_layer,
        train_func,
        test_func,
        predict_func,
        on_epoch_finished=[SaveParams("save_params", "word_embedding", save_interval=1)],
    )
    # estimator.draw_network() # requires networkx package

    train = True
    if train:
        estimator.fit(X_train, y_train)
    else:
        estimator.load_weights_from("word_embedding/saved_params_3")
        pred_sents = []
        # For each test example, predict the response.
        for idx in xrange(X_test["X"].shape[0]):
            pred_words = []

            temp = X_test["X"][idx].reshape(1, MAX_SEQ_LEN)
            X_new = np.empty_like(temp)
            X_new[:] = temp

            temp = X_test["X_mask"][idx].reshape(1, MAX_SEQ_LEN)
            X_mask_new = np.empty_like(temp)
            X_mask_new[:] = temp
            # Predict one word at a time, based on the previous predicted words and the query.
            for pred_iter in range(10):  # FIXME: predicting 10 words. To stop predicting, make the model predict <EOS>.
                zero_found = False