def test_tree_is_apposition(self): self.assertEqual( True, mention_property_computer.is_apposition({ "parse_tree": nltk.ParentedTree.fromstring( "(NP (NP (NP (NNP Secretary)) (PP (IN of) (NP " "(NNP State)))) (NP (NNP Madeleine) (NNP Albright)))") })) self.assertEqual( False, mention_property_computer.is_apposition({ "parse_tree": nltk.ParentedTree.fromstring( "(NP (NP (NNP Secretary)) (PP (IN of) " "(NP (NNP State))))") })) self.assertEqual( False, mention_property_computer.is_apposition({ "parse_tree": nltk.ParentedTree.fromstring( "(NP (NP (NP (NNP Al) (NNP Gore) (POS 's)) (NN campaign) " "(NN manager)) (, ,) (NP (NNP Bill) (NNP Daley)) (, ,))") })) self.assertEqual( False, mention_property_computer.is_apposition({ "parse_tree": nltk.ParentedTree.fromstring( "(NP (NP (NNS news)) (NP (CD today)))") })) self.assertEqual( False, mention_property_computer.is_apposition({ "parse_tree": nltk.ParentedTree.fromstring( "(NP (NP (PRP$ his) (NN brother)) (, ,) (NP (PRP$ his) " "(NN sister)))") })) self.assertEqual( True, mention_property_computer.is_apposition({ "parse_tree": nltk.ParentedTree.fromstring( "(NP (NP (NNP Barack) (NNP Obama)) (, ,) (NP (DT the) " "(NN president)))") }))
def test_tree_is_apposition(self): self.assertEqual( True, mention_property_computer.is_apposition({ "parse_tree": nltk_util.parse_parented_tree( "(NP (NP (NP (NNP Secretary)) (PP (IN of) (NP " "(NNP State)))) (NP (NNP Madeleine) (NNP Albright)))")})) self.assertEqual( False, mention_property_computer.is_apposition({ "parse_tree": nltk_util.parse_parented_tree( "(NP (NP (NNP Secretary)) (PP (IN of) " "(NP (NNP State))))")})) self.assertEqual( False, mention_property_computer.is_apposition({ "parse_tree": nltk_util.parse_parented_tree( "(NP (NP (NP (NNP Al) (NNP Gore) (POS 's)) (NN campaign) " "(NN manager)) (, ,) (NP (NNP Bill) (NNP Daley)) (, ,))")})) self.assertEqual( False, mention_property_computer.is_apposition({ "parse_tree": nltk_util.parse_parented_tree( "(NP (NP (NNS news)) (NP (CD today)))")})) self.assertEqual( False, mention_property_computer.is_apposition({ "parse_tree": nltk_util.parse_parented_tree( "(NP (NP (PRP$ his) (NN brother)) (, ,) (NP (PRP$ his) " "(NN sister)))")})) self.assertEqual( True, mention_property_computer.is_apposition({ "parse_tree": nltk_util.parse_parented_tree( "(NP (NP (NNP Barack) (NNP Obama)) (, ,) (NP (DT the) " "(NN president)))")}))
def from_document(span, document, first_in_gold_entity=False): """ Create a mention from a span in a document. All attributes of the mention are computed from the linguistic information found in the document. For information about the attributes, see the class documentation. Args: document (CoNLLDocument): The document the mention belongs to. span (Span): The span of the mention in the document. Returns: Mention: A mention extracted from the input span in the input document. """ i, sentence_span = document.get_sentence_id_and_span(span) attributes = { "tokens": document.tokens[span.begin:span.end + 1], "pos": document.pos[span.begin:span.end + 1], "ner": document.ner[span.begin:span.end + 1], "sentence_id": i, "parse_tree": mention_property_computer.get_relevant_subtree( span, document), "speaker": document.speakers[span.begin], "antecedent": None, "set_id": None, "first_in_gold_entity": first_in_gold_entity } if span in document.coref: attributes["annotated_set_id"] = document.coref[span] else: attributes["annotated_set_id"] = None attributes["is_apposition"] = \ mention_property_computer.is_apposition(attributes) attributes["grammatical_function"] = \ mention_property_computer.get_grammatical_function(attributes) (head, in_mention_span, head_index) = \ mention_property_computer.compute_head_information(attributes) attributes["head"] = head attributes["head_span"] = spans.Span( span.begin + in_mention_span.begin, span.begin + in_mention_span.end ) attributes["head_index"] = head_index attributes["type"] = mention_property_computer.get_type(attributes) attributes["fine_type"] = mention_property_computer.get_fine_type( attributes) if attributes["type"] == "PRO": attributes["citation_form"] = \ mention_property_computer.get_citation_form( attributes) attributes["number"] = \ mention_property_computer.compute_number(attributes) attributes["gender"] = \ mention_property_computer.compute_gender(attributes) attributes["semantic_class"] = \ mention_property_computer.compute_semantic_class(attributes) attributes["head_as_lowercase_string"] = " ".join(attributes[ "head"]).lower() attributes["tokens_as_lowercase_string"] = " ".join(attributes[ "tokens"]).lower() dep_tree = document.dep[i] index = span.begin + head_index - sentence_span.begin governor_id = dep_tree[index].head - 1 if governor_id == -1: attributes["governor"] = "NONE" else: attributes["governor"] = dep_tree[governor_id].form.lower() attributes["ancestry"] = Mention._get_ancestry(dep_tree, index) attributes["deprel"] = dep_tree[index].deprel return Mention(document, span, attributes)