def test_syntax_kernel(self): information_content = wordnet_ic.ic('ic-treebank-resnik-add1.dat') test_tokens = ['ceo','coach', 'manager','boss', 'brother','sister'] feature_accumulator = extract_features.make_feature_accumulator( load='test/000') test_ids = [[feature_accumulator.get_id(token)] for token in test_tokens] # Next test each of the semantic similarities suffix_multiplier=2.0 kernel = k.bind_kernel( features=feature_accumulator, syntax_feature_types=None, semantic_similarity=None, include_suffix=True, suffix_multiplier=suffix_multiplier ) found_results = kernel(test_ids, test_ids) expected_results = self.get_expected_results( test_tokens, lambda x,y: suffix_multiplier * ( feature_accumulator.get_suffix(x) is not None and feature_accumulator.get_suffix(x) == feature_accumulator.get_suffix(y) ) ) self.assertEqual(found_results, expected_results)
def test_semantic_kernel(self): information_content = wordnet_ic.ic('ic-treebank-resnik-add1.dat') test_tokens = ['ceo','coach', 'manager','boss', 'brother','sister'] feature_accumulator = extract_features.make_feature_accumulator( load='test/000') test_ids = [[feature_accumulator.get_id(token)] for token in test_tokens] # Next test each of the semantic similarities for similarity_type in k.LEGAL_SIMILARITIES: kernel = k.bind_kernel( features=feature_accumulator, syntax_feature_types=None, semantic_similarity=similarity_type, semantic_multiplier=1.0 ) found_results = kernel(test_ids, test_ids) expected_results = self.get_expected_results( test_tokens, lambda x,y: 1.0 * k.max_similarity( similarity_type, k.nouns_only(wordnet.synsets(x)), k.nouns_only(wordnet.synsets(y)), information_content ) ) #print '\n' + '-'*70 + '\n' #print similarity_type #print np.round(np.array(expected_results), 3) self.assertEqual(found_results, expected_results)
def test_syntax_kernel(self): test_tokens = ['ceo','coach', 'manager','boss', 'brother','sister'] feature_accumulator = extract_features.make_feature_accumulator( load='test/000') test_ids = [[feature_accumulator.get_id(token)] for token in test_tokens] syntax_feature_types = ['baseline', 'dependency', 'hand_picked'] kernel = k.bind_kernel( features=feature_accumulator, syntax_feature_types=syntax_feature_types, semantic_similarity=None, syntactic_multiplier=1.0, semantic_multiplier=1.0, ) found_results = kernel(test_ids, test_ids) expected_results = self.get_expected_results( test_tokens, lambda x,y: 1.0 * k.dict_dot( feature_accumulator.get_features(x, syntax_feature_types), feature_accumulator.get_features(y, syntax_feature_types) ) ) print np.round(np.array(expected_results), 3) self.assertEqual(found_results, expected_results)
def test_load(self): feature_accumulator = extract_features.make_feature_accumulator() feature_accumulator.load('test/extracted1') self.assert_feature_like_on_disc(feature_accumulator, 'test/extracted1') feature_accumulator.load('test/extracted2') self.assert_feature_like_on_disc(feature_accumulator, 'test/extracted2')
def test_extract(self): article = corenlp_xml_reader.AnnotatedText( open('test/corenlp1.xml').read() ) feature_accumulator = extract_features.make_feature_accumulator() feature_accumulator.extract(article) # Test that the features extracted are the ones expected self.assert_feature_like_on_disc(feature_accumulator, 'test/extracted1')
def test_merge_load(self): article = corenlp_xml_reader.AnnotatedText( open('test/corenlp1.xml').read() ) feature_accumulator = extract_features.make_feature_accumulator() feature_accumulator.extract(article) feature_accumulator.merge_load('test/extracted2') self.assert_feature_like_on_disc( feature_accumulator, 'test/merged-extracted')
def test_extract_(self): feature_accumulator = extract_features.make_feature_accumulator() article = corenlp_xml_reader.AnnotatedText( open('test/corenlp1.xml').read() ) feature_accumulator.extract(article) article = corenlp_xml_reader.AnnotatedText( open('test/corenlp2.xml').read() ) feature_accumulator.extract(article) feature_accumulator.write('test/merged-extracted')
def test_normalized(self): feature_accumulator = extract_features.make_feature_accumulator() article = corenlp_xml_reader.AnnotatedText( open('test/corenlp1.xml').read() ) feature_accumulator.extract(article) normalized_features = { token : feature_accumulator.get_features(token, ['dependency']) for token in feature_accumulator.dictionary.get_token_list() } open('test/normalized1.json', 'w').write( json.dumps(normalized_features) )