Пример #1
0
 def add_new_vocab_rule(self, rule):
     """
     Adds a new vocabulary rule to the set of rules, and
     recreates self.cfg and self.parser.
     """
     self.rules.append(Production(NT(rule[0]), rule[1]))
     self.cfg = ContextFreeGrammar(NT("S"), self.rules)
     self.parser = EarleyChartParser(self.cfg, trace=0)
Пример #2
0
def find_after_verb(parse):
    """
    Finds the first "After_Verb_*" structure in the parse.
    """
    if isinstance(parse, str): return None
    tree = parse.productions()[0]

    if tree.lhs() == NT("After_Verb_Tr") or \
       tree.lhs() == NT("After_Verb_In"):
        return parse
    else:
        for subtree in parse:
            subj = find_after_verb(subtree)
            if subj: return subj
Пример #3
0
def find_compound_noun(parse):
    """
    Finds the first compound noun in the parse.
    """
    if isinstance(parse, str): return None
    tree = parse.productions()[0]

    if tree.lhs() == NT("CompoundNoun") or \
       tree.lhs() == NT("CompoundNoun_Pl"):
        return parse
    else:
        for subtree in parse:
            c = find_compound_noun(subtree)
            if c: return c

    return None
Пример #4
0
    def __init__(self, rules_file="rules.gr", vocab_file="vocabulary.gr"):
        """
        Reads in grammar rules (from rules_file) and vocab rules (from
        vocab_file) and creates self.cfg (a ContextFreeGrammar) and
        self.parser (a EarleyChartParser).
        """
        self.rules = []
        test_sentences = []

        # get the rules from rules_file
        grammar = open(rules_file, "r")
        line = grammar.readline()
        while line:
            if line.strip() != "" and not line.strip().startswith("#"):
                line = line[2:]
                parts = line.partition("\t")
                lhs = parts[0].strip()
                rhs = [NT(x) for x in parts[2].strip().split(" ")]
                self.rules.append(Production(NT(lhs), rhs))
            line = grammar.readline()
        grammar.close()

        # get the rules from vocab_file
        vocab = open(vocab_file, "r")
        line = vocab.readline()
        while line:
            if line.strip() != "" and not line.strip().startswith("#"):
                line = line[2:]
                parts = line.partition("\t")
                lhs = parts[0].strip()
                rhs = parts[2].strip().lower().split(" ")
                self.rules.append(Production(NT(lhs), rhs))
            line = vocab.readline()
        vocab.close()

        # create the grammar and parser
        self.cfg = ContextFreeGrammar(NT("S"), self.rules)
        self.parser = EarleyChartParser(self.cfg, trace=0)
Пример #5
0
def find_PP(parse):
    """
    Finds the first prepositional phrase in the parse.
    """
    if isinstance(parse, str): return None
    tree = parse.productions()[0]

    if tree.lhs() == NT("PP"):
        return parse[-1]
    else:
        for subtree in parse:
            pp = find_PP(subtree)
            if pp: return pp

    return None
Пример #6
0
def get_sentence_type(parse):
    """
    Determines the sentence type recursively, based on the rules the
    tree is built out of.
    """
    if isinstance(parse, str):
        return 0

    lhs = parse.productions()[0].lhs()
    if lhs == NT("Ind_Clause_Ques") or \
       lhs == NT("Ind_Clause_Ques_Aux"):
        return QUESTION
    elif lhs == NT("Ind_Clause") or \
         lhs == NT("Ind_Clause_Pl"):
        if parse.productions()[0].rhs()[0] == NT("VP_Inf"):
            return COMMAND
        else:
            return STATEMENT

    for subtree in parse:
        type = get_sentence_type(subtree)
        if type: return type

    return 0
Пример #7
0
    def parse_NP(self, sen):
        """
        Parses a partial sentence (that is, usually a noun phrase.
        Returns the parse, or returns a tuple.
        """
        try:
            cfg_temp = ContextFreeGrammar(NT("NP"), self.rules)
            parser_temp = EarleyChartParser(cfg_temp, trace=0)
            parse = parser_temp.nbest_parse(sen.strip().split(" "), trace=0)
        except:
            print traceback.format_exc()
        else:
            if parse:
                return parse[0]

        print "failure"
        return None
Пример #8
0
    def rand_sent(self):
        """
        Creates a random sentence from self.cfg.
        """
        poss = self.cfg.productions(lhs=NT("S"))
        if len(poss) > 1:
            index = random.randint(0, len(poss) - 1)
        elif len(poss) == 1:
            index = 0
        else:
            print left
            return None

        sen = []
        print poss[index]
        for nt in poss[index].rhs():
            if isinstance(nt, NT):
                sen.append(rand_sent(nt))
            else:
                sen.append(nt)

        return " ".join(sen)
Пример #9
0
def find_noun(parse, exceptions=[]):
    """
    Finds the first noun in the parse.
    """
    if isinstance(parse, str): return None
    tree = parse.productions()[0]

    if (tree.lhs() == NT("NP") or \
       tree.lhs() == NT("NP_1st") or \
       tree.lhs() == NT("NP_2nd") or \
       tree.lhs() == NT("NP_3rd") or \
       tree.lhs() == NT("NP_1st_Pl") or \
       tree.lhs() == NT("NP_3rd_Pl") or \
       tree.lhs() == NT("NP_Obj") or \
       tree.lhs() == NT("Name") or \
       tree.lhs() == NT("Place") or \
       tree.lhs() == NT("Program") or \
       tree.lhs() == NT("Org") or \
       tree.lhs() == NT("Field") or \
       tree.lhs() == NT("Nominal") or \
       tree.lhs() == NT("Command") or \
       tree.lhs() == NT("File_Addr") or \
       tree.lhs() == NT("Web_Addr") or \
       tree.lhs() == NT("CompoundNoun") or \
       tree.lhs() == NT("Noun") or \
       tree.lhs() == NT("Noun_Pl") or \
       tree.lhs() == NT("Nominal") or \
       tree.lhs() == NT("Nominal_Pl")) and \
       " ".join(parse.leaves()) not in exceptions:
        return parse
    else:
        for subtree in parse:
            n = find_noun(subtree, exceptions)
            if n: return n

    return None
Пример #10
0
def find_topic(parse, type=None, qword=None):
    """
    Finds the topic of a sentence, based on the sentence type:
    either QUESTION, STATEMENT, or COMMAND.
    """

    # find the sentence type if it's not specified
    if type == None: type = get_sentence_type(parse)
    if isinstance(parse, str): return None
    tree = parse.productions()[0]
    print type, "- tree:", tree

    # for questions
    if type == QUESTION:

        # All questions start with the "Ind_Clause_Ques*" structure.
        # After that, there are several possibilities:
        #    - VP_3rd
        #    - Ind_Clause_Ques_Aux
        #    - Interrog_Clause
        #    - Ind_Clause_Inf*
        # Depending on which one of these comes next, keep searching
        # for the topic.
        if tree.lhs() == NT("Ind_Clause_Ques") or \
           tree.lhs() == NT("Ind_Clause_Ques_Aux"):
            if not qword:
                qword = parse[0].leaves()[0]
                print "qword:", qword

            rhs = tree.rhs()
            if rhs[-1] == NT("VP_3rd"):
                print "VP_3rd"
                #return parse[-1][-1], qword
                t = find_after_verb(parse[-1][-1])
                if not t:
                    t = find_PP(parse[-1][-1])

                return t, qword

            # this acts just like a statement, so call find_topic
            # again, but specifying the type=STATEMENT
            elif rhs[-1] == NT("Ind_Clause_Ques_Aux"):
                print "Ind_Clause_Ques_Aux"
                return find_topic(parse[-1][-1], type=STATEMENT), qword

            elif rhs[-1] == NT("Interrog_Clause"):
                print "Interrog_Clause"
                t = find_after_verb(parse[-1][-1])
                if not t:
                    t = find_PP(parse[-1][-1])
                return t, qword

            # this acts just like a statement, so call find_topic
            # again, but specifying the type=STATEMENT
            elif rhs[-1] == NT("Ind_Clause_Inf") or \
                 rhs[-1] == NT("Ind_Clause_Inf_3rd"):
                print "Ind_Clause_Inf"
                return find_topic(parse[-1], type=STATEMENT), qword
        else:
            for subtree in parse:
                subj = find_topic(subtree, type)
                if subj: return subj

    # for statements
    elif type == STATEMENT:
        if tree.lhs() == NT("VP_1st") or \
           tree.lhs() == NT("VP_Inf"):
            t = find_after_verb(parse[-1][-1])
            if not t:
                t = find_PP(parse[-1][-1])
            return t
        else:
            for subtree in parse:
                subj = find_topic(subtree, type)
                if subj: return subj

    # for commands
    elif type == COMMAND:
        if tree.lhs() == NT("VP_Inf"):
            rhs = tree.rhs()
            if rhs[-1] == NT("PP"):
                return parse[-1]
#             elif \
#                rhs[-1] == NT("After_Verb_Tr") or \
#                rhs[-1] == NT("After_Verb_In") or \
#                rhs[-1] == NT("V_Inf_In_Neg") or \
#                rhs[-1] == NT("VP_Inf") or \
#                rhs[-1] == NT("NP_Obj"):
            else:
                return find_after_verb(parse)

        elif tree.lhs() == NT("PP"):
            return parse[-1]
        else:
            for subtree in parse:
                subj = find_topic(subtree, type)
                if subj: return subj

    return None