Пример #1
0
 def test_qaranker_local_integration(self):
     relations = Relations.read(self.qa_path + "/relations.txt")
     assert len(relations) == 4
     text_set = TextSet.read_csv(self.qa_path + "/question_corpus.csv")
     assert text_set.get_uris() == ["Q1", "Q2"]
     transformed = text_set.tokenize().normalize().word2idx(
     ).shape_sequence(5)
     relation_pairs = TextSet.from_relation_pairs(relations, transformed,
                                                  transformed)
     pair_samples = relation_pairs.get_samples()
     assert len(pair_samples) == 2
     for sample in pair_samples:
         assert list(sample.feature.shape) == [2, 10]
         assert np.allclose(sample.label.to_ndarray(),
                            np.array([[1.0], [0.0]]))
     relation_lists = TextSet.from_relation_lists(relations, transformed,
                                                  transformed)
     relation_samples = relation_lists.get_samples()
     assert len(relation_samples) == 2
     for sample in relation_samples:
         assert list(sample.feature.shape) == [2, 10]
         assert list(sample.label.shape) == [2, 1]
     knrm = KNRM(5,
                 5,
                 self.glove_path,
                 word_index=transformed.get_word_index())
     model = Sequential().add(TimeDistributed(knrm, input_shape=(2, 10)))
     model.compile("sgd", "rank_hinge")
     model.fit(relation_pairs, batch_size=2, nb_epoch=2)
     print(knrm.evaluate_ndcg(relation_lists, 3))
     print(knrm.evaluate_map(relation_lists))
Пример #2
0
 def test_with_keras(self):
     kmodel = self.keras_knrm(5, 10, 22, 50)
     input_data = np.random.randint(20, size=(4, 15))
     koutput = kmodel.predict([input_data[:, :5], input_data[:, 5:]])
     kweights = kmodel.get_weights()
     bweights = [kweights[0], np.transpose(kweights[1]), kweights[2]]
     model = KNRM(5, 10, glove_path)
     model.set_weights(bweights)
     output = model.forward(input_data)
     self.assert_allclose(output, koutput)
Пример #3
0
 def test_forward_backward(self):
     weights = np.random.random([40, 20])
     model = KNRM(15,
                  60,
                  40,
                  20,
                  embed_weights=weights,
                  kernel_num=10,
                  sigma=0.15,
                  exact_sigma=1e-4)
     input_data = np.random.randint(40, size=(1, 75))
     self.assert_forward_backward(model, input_data)
Пример #4
0
 def test_save_load(self):
     model = KNRM(5, 10, glove_path)
     input_data = np.random.randint(20, size=(3, 15))
     self.assert_zoo_model_save_load(model, input_data)
Пример #5
0
 def test_forward_backward(self):
     model = KNRM(15, 60, glove_path, word_index={"is": 1, "to": 2, "the": 3, "for": 4},
                  kernel_num=10, sigma=0.15, exact_sigma=1e-4)
     input_data = np.random.randint(5, size=(1, 75))
     self.assert_forward_backward(model, input_data)
Пример #6
0
                             sc, int(options.partition_num)).tokenize().normalize()\
        .word2idx(min_freq=2).shape_sequence(int(options.question_length))
    a_set = TextSet.read_csv(options.data_path+"/answer_corpus.csv",
                             sc, int(options.partition_num)).tokenize().normalize()\
        .word2idx(min_freq=2, existing_map=q_set.get_word_index())\
        .shape_sequence(int(options.answer_length))

    train_relations = Relations.read(options.data_path + "/relation_train.csv",
                                     sc, int(options.partition_num))
    train_set = TextSet.from_relation_pairs(train_relations, q_set, a_set)
    validate_relations = Relations.read(options.data_path + "/relation_valid.csv",
                                        sc, int(options.partition_num))
    validate_set = TextSet.from_relation_lists(validate_relations, q_set, a_set)

    if options.model:
        knrm = KNRM.load_model(options.model)
    else:
        word_index = a_set.get_word_index()
        knrm = KNRM(int(options.question_length), int(options.answer_length),
                    options.embedding_file, word_index)
    model = Sequential().add(
        TimeDistributed(
            knrm,
            input_shape=(2, int(options.question_length) + int(options.answer_length))))
    model.compile(optimizer=SGD(learningrate=float(options.learning_rate)),
                  loss="rank_hinge")
    for i in range(0, int(options.nb_epoch)):
        model.fit(train_set, batch_size=int(options.batch_size), nb_epoch=1)
        knrm.evaluate_ndcg(validate_set, 3)
        knrm.evaluate_ndcg(validate_set, 5)
        knrm.evaluate_map(validate_set)