Пример #1
0
 def dep_parse(self, sentence):
     #Lazy-initialize the depparser
     if self.depparser is None:
         from nltk.parse import MaltParser
         self.depparser = MaltParser(tagger=self.get_pos_tagger())
     if not self.depparser._trained:
         self.train_depparser()
     return [self.depparser.parse(sentence, verbose=self.verbose)]
Пример #2
0
    def dep_parse(self, sentence='every cat leaves'):
        #Lazy-initialize the depparser
        if self.depparser is None:
            from nltk.parse import MaltParser
            self.depparser = MaltParser(tagger=self.get_pos_tagger())
        if not self.depparser._trained:
            self.train_depparser()

        tokens = sentence.split()
        return [self.depparser.parse(tokens, verbose=self.verbose)]
Пример #3
0
    def dep_parse(self, sentence):
        """
        Return a dependency graph for the sentence.

        :param sentence: the sentence to be parsed
        :type sentence: list(str)
        :rtype: DependencyGraph
        """

        #Lazy-initialize the depparser
        if self.depparser is None:
            from nltk.parse import MaltParser
            self.depparser = MaltParser(tagger=self.get_pos_tagger())
        if not self.depparser._trained:
            self.train_depparser()
        return self.depparser.parse(sentence, verbose=self.verbose)
Пример #4
0
 def dep_parse(self, sentence):
     #Lazy-initialize the depparser
     if self.depparser is None:
         from nltk.parse import MaltParser
         self.depparser = MaltParser(tagger=self.get_pos_tagger())
     if not self.depparser._trained:
         self.train_depparser()
     return [self.depparser.parse(sentence, verbose=self.verbose)]
Пример #5
0
    def dep_parse(self, sentence='every cat leaves'):
        #Lazy-initialize the depparser
        if self.depparser is None:
            from nltk.parse import MaltParser
            self.depparser = MaltParser(tagger=self.get_pos_tagger())
        if not self.depparser._trained:
            self.train_depparser()

        tokens = sentence.split()
        return [self.depparser.parse(tokens, verbose=self.verbose)]
Пример #6
0
    def _init_glue(self):
        tagger = RegexpTagger([
            ('^(David|Mary|John)$', 'NNP'),
            ('^(walks|sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$',
             'VB'), ('^(go|order|vanish|find|approach)$', 'VB'),
            ('^(a)$', 'ex_quant'), ('^(every)$', 'univ_quant'),
            ('^(sandwich|man|dog|pizza|unicorn|cat|senator)$', 'NN'),
            ('^(big|gray|former)$', 'JJ'), ('^(him|himself)$', 'PRP')
        ])

        depparser = MaltParser(tagger=tagger)
        self._glue = DrtGlue(depparser=depparser, remove_duplicates=False)
Пример #7
0
def demo(show_example=-1):
    from nltk.parse import MaltParser

    examples = [
        "David sees Mary",
        "David eats a sandwich",
        "every man chases a dog",
        "every man believes a dog sleeps",
        "John gives David a sandwich",
        "John chases himself",
    ]
    #                'John persuades David to order a pizza',
    #                'John tries to go',
    #                'John tries to find a unicorn',
    #                'John seems to vanish',
    #                'a unicorn seems to approach',
    #                'every big cat leaves',
    #                'every gray cat leaves',
    #                'every big gray cat leaves',
    #                'a former senator leaves',

    print("============== DEMO ==============")

    tagger = RegexpTagger(
        [
            ("^(David|Mary|John)$", "NNP"),
            (
                "^(sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$",
                "VB",
            ),
            ("^(go|order|vanish|find|approach)$", "VB"),
            ("^(a)$", "ex_quant"),
            ("^(every)$", "univ_quant"),
            ("^(sandwich|man|dog|pizza|unicorn|cat|senator)$", "NN"),
            ("^(big|gray|former)$", "JJ"),
            ("^(him|himself)$", "PRP"),
        ]
    )

    depparser = MaltParser(tagger=tagger)
    glue = Glue(depparser=depparser, verbose=False)

    for (i, sentence) in enumerate(examples):
        if i == show_example or show_example == -1:
            print(f"[[[Example {i}]]]  {sentence}")
            for reading in glue.parse_to_meaning(sentence.split()):
                print(reading.simplify())
            print("")
Пример #8
0
def demo(show_example=-1):
    from nltk.parse import MaltParser

    examples = [
        'David sees Mary',
        'David eats a sandwich',
        'every man chases a dog',
        'every man believes a dog sleeps',
        'John gives David a sandwich',
        'John chases himself',
    ]
    #                'John persuades David to order a pizza',
    #                'John tries to go',
    #                'John tries to find a unicorn',
    #                'John seems to vanish',
    #                'a unicorn seems to approach',
    #                'every big cat leaves',
    #                'every gray cat leaves',
    #                'every big gray cat leaves',
    #                'a former senator leaves',

    print('============== DEMO ==============')

    tagger = RegexpTagger(
        [
            ('^(David|Mary|John)$', 'NNP'),
            (
                '^(sees|eats|chases|believes|gives|sleeps|chases|persuades|tries|seems|leaves)$',
                'VB',
            ),
            ('^(go|order|vanish|find|approach)$', 'VB'),
            ('^(a)$', 'ex_quant'),
            ('^(every)$', 'univ_quant'),
            ('^(sandwich|man|dog|pizza|unicorn|cat|senator)$', 'NN'),
            ('^(big|gray|former)$', 'JJ'),
            ('^(him|himself)$', 'PRP'),
        ]
    )

    depparser = MaltParser(tagger=tagger)
    glue = Glue(depparser=depparser, verbose=False)

    for (i, sentence) in enumerate(examples):
        if i == show_example or show_example == -1:
            print('[[[Example %s]]]  %s' % (i, sentence))
            for reading in glue.parse_to_meaning(sentence.split()):
                print(reading.simplify())
            print('')
Пример #9
0
    def dep_parse(self, sentence):
        """
        Return a dependency graph for the sentence.

        :param sentence: the sentence to be parsed
        :type sentence: list(str)
        :rtype: DependencyGraph
        """

        #Lazy-initialize the depparser
        if self.depparser is None:
            from nltk.parse import MaltParser
            self.depparser = MaltParser(tagger=self.get_pos_tagger())
        if not self.depparser._trained:
            self.train_depparser()
        return self.depparser.parse(sentence, verbose=self.verbose)
Пример #10
0
class Glue:
    def __init__(
        self, semtype_file=None, remove_duplicates=False, depparser=None, verbose=False
    ):
        self.verbose = verbose
        self.remove_duplicates = remove_duplicates
        self.depparser = depparser

        from nltk import Prover9

        self.prover = Prover9()

        if semtype_file:
            self.semtype_file = semtype_file
        else:
            self.semtype_file = os.path.join(
                "grammars", "sample_grammars", "glue.semtype"
            )

    def train_depparser(self, depgraphs=None):
        if depgraphs:
            self.depparser.train(depgraphs)
        else:
            self.depparser.train_from_file(
                nltk.data.find(
                    os.path.join("grammars", "sample_grammars", "glue_train.conll")
                )
            )

    def parse_to_meaning(self, sentence):
        readings = []
        for agenda in self.parse_to_compiled(sentence):
            readings.extend(self.get_readings(agenda))
        return readings

    def get_readings(self, agenda):
        readings = []
        agenda_length = len(agenda)
        atomics = dict()
        nonatomics = dict()
        while agenda:  # is not empty
            cur = agenda.pop()
            glue_simp = cur.glue.simplify()
            if isinstance(
                glue_simp, linearlogic.ImpExpression
            ):  # if cur.glue is non-atomic
                for key in atomics:
                    try:
                        if isinstance(cur.glue, linearlogic.ApplicationExpression):
                            bindings = cur.glue.bindings
                        else:
                            bindings = linearlogic.BindingDict()
                        glue_simp.antecedent.unify(key, bindings)
                        for atomic in atomics[key]:
                            if not (
                                cur.indices & atomic.indices
                            ):  # if the sets of indices are disjoint
                                try:
                                    agenda.append(cur.applyto(atomic))
                                except linearlogic.LinearLogicApplicationException:
                                    pass
                    except linearlogic.UnificationException:
                        pass
                try:
                    nonatomics[glue_simp.antecedent].append(cur)
                except KeyError:
                    nonatomics[glue_simp.antecedent] = [cur]

            else:  # else cur.glue is atomic
                for key in nonatomics:
                    for nonatomic in nonatomics[key]:
                        try:
                            if isinstance(
                                nonatomic.glue, linearlogic.ApplicationExpression
                            ):
                                bindings = nonatomic.glue.bindings
                            else:
                                bindings = linearlogic.BindingDict()
                            glue_simp.unify(key, bindings)
                            if not (
                                cur.indices & nonatomic.indices
                            ):  # if the sets of indices are disjoint
                                try:
                                    agenda.append(nonatomic.applyto(cur))
                                except linearlogic.LinearLogicApplicationException:
                                    pass
                        except linearlogic.UnificationException:
                            pass
                try:
                    atomics[glue_simp].append(cur)
                except KeyError:
                    atomics[glue_simp] = [cur]

        for entry in atomics:
            for gf in atomics[entry]:
                if len(gf.indices) == agenda_length:
                    self._add_to_reading_list(gf, readings)
        for entry in nonatomics:
            for gf in nonatomics[entry]:
                if len(gf.indices) == agenda_length:
                    self._add_to_reading_list(gf, readings)
        return readings

    def _add_to_reading_list(self, glueformula, reading_list):
        add_reading = True
        if self.remove_duplicates:
            for reading in reading_list:
                try:
                    if reading.equiv(glueformula.meaning, self.prover):
                        add_reading = False
                        break
                except Exception as e:
                    # if there is an exception, the syntax of the formula
                    # may not be understandable by the prover, so don't
                    # throw out the reading.
                    print("Error when checking logical equality of statements", e)

        if add_reading:
            reading_list.append(glueformula.meaning)

    def parse_to_compiled(self, sentence):
        gfls = [self.depgraph_to_glue(dg) for dg in self.dep_parse(sentence)]
        return [self.gfl_to_compiled(gfl) for gfl in gfls]

    def dep_parse(self, sentence):
        """
        Return a dependency graph for the sentence.

        :param sentence: the sentence to be parsed
        :type sentence: list(str)
        :rtype: DependencyGraph
        """

        # Lazy-initialize the depparser
        if self.depparser is None:
            from nltk.parse import MaltParser

            self.depparser = MaltParser(tagger=self.get_pos_tagger())
        if not self.depparser._trained:
            self.train_depparser()
        return self.depparser.parse(sentence, verbose=self.verbose)

    def depgraph_to_glue(self, depgraph):
        return self.get_glue_dict().to_glueformula_list(depgraph)

    def get_glue_dict(self):
        return GlueDict(self.semtype_file)

    def gfl_to_compiled(self, gfl):
        index_counter = Counter()
        return_list = []
        for gf in gfl:
            return_list.extend(gf.compile(index_counter))

        if self.verbose:
            print("Compiled Glue Premises:")
            for cgf in return_list:
                print(cgf)

        return return_list

    def get_pos_tagger(self):
        from nltk.corpus import brown

        regexp_tagger = RegexpTagger(
            [
                (r"^-?[0-9]+(\.[0-9]+)?$", "CD"),  # cardinal numbers
                (r"(The|the|A|a|An|an)$", "AT"),  # articles
                (r".*able$", "JJ"),  # adjectives
                (r".*ness$", "NN"),  # nouns formed from adjectives
                (r".*ly$", "RB"),  # adverbs
                (r".*s$", "NNS"),  # plural nouns
                (r".*ing$", "VBG"),  # gerunds
                (r".*ed$", "VBD"),  # past tense verbs
                (r".*", "NN"),  # nouns (default)
            ]
        )
        brown_train = brown.tagged_sents(categories="news")
        unigram_tagger = UnigramTagger(brown_train, backoff=regexp_tagger)
        bigram_tagger = BigramTagger(brown_train, backoff=unigram_tagger)
        trigram_tagger = TrigramTagger(brown_train, backoff=bigram_tagger)

        # Override particular words
        main_tagger = RegexpTagger(
            [(r"(A|a|An|an)$", "ex_quant"), (r"(Every|every|All|all)$", "univ_quant")],
            backoff=trigram_tagger,
        )

        return main_tagger
Пример #11
0
class Glue(object):
    def __init__(self, semtype_file=None, remove_duplicates=False,
                 depparser=None, verbose=False):
        self.verbose = verbose
        self.remove_duplicates = remove_duplicates
        self.depparser = depparser

        from nltk import Prover9
        self.prover = Prover9()

        if semtype_file:
            self.semtype_file = semtype_file
        else:
            self.semtype_file = os.path.join('grammars', 'sample_grammars','glue.semtype')

    def train_depparser(self, depgraphs=None):
        if depgraphs:
            self.depparser.train(depgraphs)
        else:
            self.depparser.train_from_file(nltk.data.find(
                os.path.join('grammars', 'sample_grammars',
                             'glue_train.conll')))

    def parse_to_meaning(self, sentence):
        readings = []
        for agenda in self.parse_to_compiled(sentence):
            readings.extend(self.get_readings(agenda))
        return readings

    def get_readings(self, agenda):
        readings = []
        agenda_length = len(agenda)
        atomics = dict()
        nonatomics = dict()
        while agenda: # is not empty
            cur = agenda.pop()
            glue_simp = cur.glue.simplify()
            if isinstance(glue_simp, linearlogic.ImpExpression): # if cur.glue is non-atomic
                for key in atomics:
                    try:
                        if isinstance(cur.glue, linearlogic.ApplicationExpression):
                            bindings = cur.glue.bindings
                        else:
                            bindings = linearlogic.BindingDict()
                        glue_simp.antecedent.unify(key, bindings)
                        for atomic in atomics[key]:
                            if not (cur.indices & atomic.indices): # if the sets of indices are disjoint
                                try:
                                    agenda.append(cur.applyto(atomic))
                                except linearlogic.LinearLogicApplicationException:
                                    pass
                    except linearlogic.UnificationException:
                        pass
                try:
                    nonatomics[glue_simp.antecedent].append(cur)
                except KeyError:
                    nonatomics[glue_simp.antecedent] = [cur]

            else: # else cur.glue is atomic
                for key in nonatomics:
                    for nonatomic in nonatomics[key]:
                        try:
                            if isinstance(nonatomic.glue, linearlogic.ApplicationExpression):
                                bindings = nonatomic.glue.bindings
                            else:
                                bindings = linearlogic.BindingDict()
                            glue_simp.unify(key, bindings)
                            if not (cur.indices & nonatomic.indices): # if the sets of indices are disjoint
                                try:
                                    agenda.append(nonatomic.applyto(cur))
                                except linearlogic.LinearLogicApplicationException:
                                    pass
                        except linearlogic.UnificationException:
                            pass
                try:
                    atomics[glue_simp].append(cur)
                except KeyError:
                    atomics[glue_simp] = [cur]

        for entry in atomics:
            for gf in atomics[entry]:
                if len(gf.indices) == agenda_length:
                    self._add_to_reading_list(gf, readings)
        for entry in nonatomics:
            for gf in nonatomics[entry]:
                if len(gf.indices) == agenda_length:
                    self._add_to_reading_list(gf, readings)
        return readings

    def _add_to_reading_list(self, glueformula, reading_list):
        add_reading = True
        if self.remove_duplicates:
            for reading in reading_list:
                try:
                    if reading.equiv(glueformula.meaning, self.prover):
                        add_reading = False
                        break
                except Exception as e:
                    #if there is an exception, the syntax of the formula
                    #may not be understandable by the prover, so don't
                    #throw out the reading.
                    print('Error when checking logical equality of statements', e)
                    pass
        if add_reading:
            reading_list.append(glueformula.meaning)

    def parse_to_compiled(self, sentence):
        gfls = [self.depgraph_to_glue(dg) for dg in self.dep_parse(sentence)]
        return [self.gfl_to_compiled(gfl) for gfl in gfls]

    def dep_parse(self, sentence):
        #Lazy-initialize the depparser
        if self.depparser is None:
            from nltk.parse import MaltParser
            self.depparser = MaltParser(tagger=self.get_pos_tagger())
        if not self.depparser._trained:
            self.train_depparser()
        return [self.depparser.parse(sentence, verbose=self.verbose)]

    def depgraph_to_glue(self, depgraph):
        return self.get_glue_dict().to_glueformula_list(depgraph)

    def get_glue_dict(self):
        return GlueDict(self.semtype_file)

    def gfl_to_compiled(self, gfl):
        index_counter = Counter()
        return_list = []
        for gf in gfl:
            return_list.extend(gf.compile(index_counter))

        if self.verbose:
            print('Compiled Glue Premises:')
            for cgf in return_list:
                print(cgf)

        return return_list

    def get_pos_tagger(self):
        regexp_tagger = RegexpTagger(
            [(r'^-?[0-9]+(.[0-9]+)?$', 'CD'),   # cardinal numbers
             (r'(The|the|A|a|An|an)$', 'AT'),   # articles
             (r'.*able$', 'JJ'),                # adjectives
             (r'.*ness$', 'NN'),                # nouns formed from adjectives
             (r'.*ly$', 'RB'),                  # adverbs
             (r'.*s$', 'NNS'),                  # plural nouns
             (r'.*ing$', 'VBG'),                # gerunds
             (r'.*ed$', 'VBD'),                 # past tense verbs
             (r'.*', 'NN')                      # nouns (default)
        ])
        brown_train = brown.tagged_sents(categories='news')
        unigram_tagger = UnigramTagger(brown_train, backoff=regexp_tagger)
        bigram_tagger = BigramTagger(brown_train, backoff=unigram_tagger)
        trigram_tagger = TrigramTagger(brown_train, backoff=bigram_tagger)

        #Override particular words
        main_tagger = RegexpTagger(
            [(r'(A|a|An|an)$', 'ex_quant'),
             (r'(Every|every|All|all)$', 'univ_quant')
        ], backoff=trigram_tagger)

        return main_tagger