示例#1
0
    def test_syntax_kernel(self):

        information_content = wordnet_ic.ic('ic-treebank-resnik-add1.dat')
        test_tokens = ['ceo','coach', 'manager','boss', 'brother','sister']
        feature_accumulator = extract_features.make_feature_accumulator(
            load='test/000')
        test_ids = [[feature_accumulator.get_id(token)] for token in test_tokens]


        # Next test each of the semantic similarities
        suffix_multiplier=2.0
        kernel = k.bind_kernel(
            features=feature_accumulator,
            syntax_feature_types=None,
            semantic_similarity=None,
            include_suffix=True,
            suffix_multiplier=suffix_multiplier
        )
        found_results = kernel(test_ids, test_ids)
        expected_results = self.get_expected_results(
            test_tokens, 
            lambda x,y: suffix_multiplier * (
                feature_accumulator.get_suffix(x) is not None 
                and feature_accumulator.get_suffix(x) 
                == feature_accumulator.get_suffix(y)
            )
        )

        self.assertEqual(found_results, expected_results)
示例#2
0
    def test_semantic_kernel(self):

        information_content = wordnet_ic.ic('ic-treebank-resnik-add1.dat')
        test_tokens = ['ceo','coach', 'manager','boss', 'brother','sister']
        feature_accumulator = extract_features.make_feature_accumulator(
            load='test/000')
        test_ids = [[feature_accumulator.get_id(token)] for token in test_tokens]


        # Next test each of the semantic similarities
        for similarity_type in k.LEGAL_SIMILARITIES:

            kernel = k.bind_kernel(
                features=feature_accumulator,
                syntax_feature_types=None,
                semantic_similarity=similarity_type,
                semantic_multiplier=1.0
            )
            found_results = kernel(test_ids, test_ids)
            expected_results = self.get_expected_results(
                test_tokens, 
                lambda x,y: 1.0 * k.max_similarity(
                    similarity_type,
                    k.nouns_only(wordnet.synsets(x)),
                    k.nouns_only(wordnet.synsets(y)), 
                    information_content
                )
            )
            #print '\n' + '-'*70 + '\n'
            #print similarity_type
            #print np.round(np.array(expected_results), 3)
            self.assertEqual(found_results, expected_results)
示例#3
0
    def test_syntax_kernel(self):

        test_tokens = ['ceo','coach', 'manager','boss', 'brother','sister']
        feature_accumulator = extract_features.make_feature_accumulator(
            load='test/000')
        test_ids = [[feature_accumulator.get_id(token)] for token in test_tokens]

        syntax_feature_types = ['baseline', 'dependency', 'hand_picked']
        kernel = k.bind_kernel(
            features=feature_accumulator,
            syntax_feature_types=syntax_feature_types,
            semantic_similarity=None,
            syntactic_multiplier=1.0, semantic_multiplier=1.0,
        )

        found_results = kernel(test_ids, test_ids)
        expected_results = self.get_expected_results(
            test_tokens,
            lambda x,y: 1.0 * k.dict_dot(
                feature_accumulator.get_features(x, syntax_feature_types), 
                feature_accumulator.get_features(y, syntax_feature_types)
            )
        )
        print np.round(np.array(expected_results), 3)
        self.assertEqual(found_results, expected_results)
示例#4
0
    def test_load(self):

        feature_accumulator = extract_features.make_feature_accumulator()
        feature_accumulator.load('test/extracted1')
        self.assert_feature_like_on_disc(feature_accumulator, 'test/extracted1')

        feature_accumulator.load('test/extracted2')
        self.assert_feature_like_on_disc(feature_accumulator, 'test/extracted2')
示例#5
0
    def test_extract(self):
        article = corenlp_xml_reader.AnnotatedText(
            open('test/corenlp1.xml').read()
        )
        feature_accumulator = extract_features.make_feature_accumulator()
        feature_accumulator.extract(article)

        # Test that the features extracted are the ones expected
        self.assert_feature_like_on_disc(feature_accumulator, 'test/extracted1')
示例#6
0
    def test_merge_load(self):

        article = corenlp_xml_reader.AnnotatedText(
            open('test/corenlp1.xml').read()
        )
        feature_accumulator = extract_features.make_feature_accumulator()
        feature_accumulator.extract(article)

        feature_accumulator.merge_load('test/extracted2')
        self.assert_feature_like_on_disc(
            feature_accumulator, 'test/merged-extracted')
示例#7
0
    def test_extract_(self):
        feature_accumulator = extract_features.make_feature_accumulator()

        article = corenlp_xml_reader.AnnotatedText(
            open('test/corenlp1.xml').read()
        )
        feature_accumulator.extract(article)

        article = corenlp_xml_reader.AnnotatedText(
            open('test/corenlp2.xml').read()
        )
        feature_accumulator.extract(article)

        feature_accumulator.write('test/merged-extracted')
示例#8
0
    def test_normalized(self):
        feature_accumulator = extract_features.make_feature_accumulator()
        article = corenlp_xml_reader.AnnotatedText(
            open('test/corenlp1.xml').read()
        )
        feature_accumulator.extract(article)

        normalized_features = {
            token : feature_accumulator.get_features(token, ['dependency'])
            for token in feature_accumulator.dictionary.get_token_list()
        }
        
        open('test/normalized1.json', 'w').write(
            json.dumps(normalized_features)
        )