示例#1
0
 def evaluate(self, node):
     A = Analyzer.instance()
     corp = A.get(corpus="TIGER")
     tagger = corp.tagger(True)
     tagged_words = tagger.tag(node.words())
     pres_verbs = 0
     total_verbs = 0
     for w in tagged_words:
         if w[1] and w[1].startswith("V"):
             #if w[1].startswith(u"VVFIN") or\
             #   w[1].startswith(u"VAFIN") or\
             #   w[1].startswith(u"VVINF") or\
             #   w[1].startswith(u"VVIZU"): # beinhaltet noch vergangenheit!
             #    pres_verbs += 1
             total_verbs += 1
             tense = tenses(w[0])
             if tense is not []:
                 tense = [t[0] for t in tense]
                 past_count = 0
                 present_count = 0
                 for t in tense:
                     if t == "past":
                         past_count += 1
                     elif t == "present":
                         present_count += 1
                 if present_count > past_count:
                     pres_verbs += 1
             #print w
     if total_verbs > 0:
         return float(pres_verbs) / total_verbs
     return 0.0
示例#2
0
def create_graph(nlp: StanfordCoreNLP, sentence, debug=False):
    """
    Create a graph from a given sentence with a given StanfordCoreNLP instance
    :param nlp: StanfordCoreNLP instance
    :param sentence: Sentence to create a graph from
    :param debug: Debug mode
    :return:
    """

    if debug:
        print(
            "-------------------------------------------------------------------------"
        )
        pprint(sentence)

    # -------------------------------------------------------------------------
    # Create NLTK parse tree of entire sentence
    parsed_output = nlp.parse(sentence)
    parse_tree = nltk.tree.Tree.fromstring(parsed_output)

    if debug:
        print("------------")
        print("Entire tree:")
        parse_tree.pretty_print()

    # -------------------------------------------------------------------------
    # Get all noun phrases - those are possible Node candidates
    noun_phrases = list(parse_tree.subtrees(phrase_filter))

    # There's no noun phrases, this sentence is a little _to_ simple.
    if len(noun_phrases) == 0:
        noun_phrases = list(parse_tree.subtrees(noun_filter))

    if debug:
        print("------------")
        print("All noun phrase trees:")
        for noun_phrase in noun_phrases:
            noun_phrase.pretty_print()

    # -------------------------------------------------------------------------
    # Cleanups: Remove those that are nested in another nounphrase, we're interested in the bigger one
    for current_noun_phrase in noun_phrases:
        sub_noun_phrases = list(current_noun_phrase.subtrees(phrase_filter))
        if len(sub_noun_phrases) == 0:
            pass  # There's no sub-noun-phrases, we can skip this

        # Iterate over sub noun phrases and remove all of them
        for sub_noun_phrase in sub_noun_phrases:
            if sub_noun_phrase == current_noun_phrase:
                continue  # Don't remove yourself

            i = 0
            while i <= len(noun_phrases) - 1:
                if sub_noun_phrase == noun_phrases[i]:
                    del noun_phrases[i]
                i += 1

    if debug:
        print("------------")
        print("All noun phrase trees after cleanup:")
        for noun_phrase in noun_phrases:
            noun_phrase.pretty_print()

    # -------------------------------------------------------------------------
    # Remove those sub trees from the parse tree, left over can be considered links between those Nodes
    removable_tree = parse_tree[0]
    if removable_tree.label() != 'S':
        removable_tree = removable_tree[0]

    for noun_phrase in noun_phrases:
        try:
            removable_tree.remove(noun_phrase)
        except ValueError:
            pass  # Trying to remove a sub-tree that isn't there anymore - skip it

    if len(
            noun_phrases
    ) == 1:  # There was only one nounphrase, so the other one is likely just a noun
        nouns = list(parse_tree.subtrees(noun_filter))
        if len(nouns
               ) == 1:  # Gotcha, that single noun is the other node candidate
            noun_phrases.append(nouns[0])
            try:
                removable_tree.remove(nouns[0])
            except ValueError:
                pass  # In list, but not really, swallow

    # Remove end of sentence (i.e. $.), so it doesn't interfere with the rest
    eos_subtrees = list(parse_tree.subtrees(eos_filter))
    if len(eos_subtrees) > 0:
        try:
            removable_tree.remove(eos_subtrees[0])
        except ValueError:
            pass  # Seems to be in list, but not really, swallow this error.

    if debug:
        print("------------")
        print("Rest of the tree after removing all noun-phrases:")
        parse_tree.pretty_print()

    # -------------------------------------------------------------------------
    # Create nodes and edge links

    nodes = []
    edge_links = []
    edges = []

    # Try to figure out what kind of NE the possible nodes are
    ner_tagging = nlp.ner(sentence)

    if debug:
        print("------------")
        print("NER tagging for entire sentence:")
        pprint(ner_tagging)

    # Create nodes
    for noun_phrase in noun_phrases:
        # Create the word itself
        leaves = noun_phrase.leaves()
        node_word = " ".join(leaves)
        node_ne_tag = "O"
        node_numerus = SG

        # Try to find the corresponding NER
        for current_tag in ner_tagging:
            if current_tag[1] != 'O' and current_tag[0] in leaves:
                node_ne_tag = current_tag[1]

        # Grab the first noun of the sentence
        nouns = list(noun_phrase.subtrees(noun_filter))
        if len(nouns) == 0:
            # No nouns in this phrase, no nodes created.
            continue
        noun = " ".join(nouns[0].leaves())

        # Try to determine the gender
        node_gender = gender(noun)

        # Try to determine the numerus
        if pluralize(noun) == noun and node_ne_tag == 'O':
            node_numerus = PL

        nodes.append(Node(node_word, node_ne_tag, node_gender, node_numerus))

    if debug:
        print("------------")
        print("Nodes created:")
        for node in nodes:
            pprint(str(node))

    # Create edge links
    edge_leaves = parse_tree.leaves()
    for edge_leave in edge_leaves:
        link_word = edge_leave
        link_tense = None

        try:
            link_tenses = tenses(link_word)
            if len(link_tenses) > 0:
                link_tense = link_tenses[
                    0]  # Take first, this one is most likely
        except ValueError:
            pass  # Something in the intestines of pattern.de went wrong, swallow

        edge_links.append(EdgeLinkVerb(link_word, link_tense))

    if debug:
        print("------------")
        print("Edge links created:")
        for edge_link in edge_links:
            pprint(str(edge_link))

    # -------------------------------------------------------------------------
    # Stick nodes and edge links together: they form the graph

    # The edges are, within the sentence, between two nodes in the pattern of "Node - Edge - Node"
    # Therefore, the first node, together with the second node and the first link forms one graph

    i = 0
    while i <= len(nodes) - 2 and i <= len(edge_links) - 1:
        first_node = nodes[i]
        second_node = nodes[i + 1]

        edge = Edge(first_node, edge_links[i], second_node)

        first_node.attach_edge(edge)
        second_node.attach_edge(edge)

        edges.append(edge)

        i += 1

    if debug:
        for edge in edges:
            print("------------")
            print("Edges created:")
            pprint(str(edge))

    # -------------------------------------------------------------------------
    # Create graph collection
    return Graph(nodes, edges)
示例#3
0
 def test_tenses(self):
     # Assert tense recognition.
     self.assertTrue((de.PRESENT, 3, de.SG) in de.tenses("ist"))
     self.assertTrue("2sg" in de.tenses("bist"))
     print "pattern.de.tenses()"
示例#4
0
 def test_tenses(self):
     # Assert tense of "is".
     self.assertTrue(de.PRESENT_3RD_PERSON_SINGULAR in de.tenses("ist"))
     self.assertTrue("2sg" in de.tenses("bist"))
     print "pattern.de.tenses()"
示例#5
0
 def test_tenses(self):
     # Assert tense of "is".
     self.assertTrue((de.PRESENT, 3, de.SG) in de.tenses("ist"))
     self.assertTrue("2sg" in de.tenses("bist"))
     print "pattern.de.tenses()"
示例#6
0
文件: test_de.py 项目: iicc/pattern
 def test_tenses(self):
     # Assert tense recognition.
     self.assertTrue((de.PRESENT, 3, de.SG) in de.tenses("ist"))
     self.assertTrue("2sg" in de.tenses("bist"))
     print("pattern.de.tenses()")