示例#1
0
    def read_text(self, text, aux_text=None, reset_context=True):
        if self.parser is None:
            self.debug_msg('creating parser...')
            self.parser = Parser()
            self.disamb = Disambiguation(self.hg, self.parser)
        nlp_parses = self.parser.parse_text(text)
        if reset_context:
            self.aux_text = text
            if aux_text:
                self.aux_text = '%s\n%s' % (text, aux_text)

        parses = [(p[0], self.read_sentence(Sentence(p[1])))
                  for p in nlp_parses]

        for p in parses:
            self.debug_msg('== extra ==')
            for edg in p[1].edges:
                self.debug_msg(ed.edge2str(edg))

        return parses
示例#2
0
 def read_text(self, text):
     if self.parser is None:
         self.debug_msg('creating parser...')
         self.parser = Parser()
     sents = self.parser.parse_text(text)
     return [self.read_sentence(Sentence(sent)) for sent in sents]
示例#3
0
class Extractor(object):
    def __init__(self,
                 hg,
                 stages=('alpha-forest', 'beta-naive', 'gamma', 'delta',
                         'epsilon'),
                 show_namespaces=False):
        self.hg = hg
        self.stages = stages
        self.parser = None
        self.disamb = None
        self.debug = False
        self.outputs = []
        self.aux_text = ''
        self.show_namespaces = show_namespaces

    def create_stage(self, name, output):
        if name == 'alpha-forest':
            return AlphaForest()
        elif name == 'beta':
            return BetaStage(self.hg, self.parser, self.disamb, output,
                             self.aux_text)
        elif name == 'beta-simple':
            return BetaStageSimple(output)
        elif name == 'beta-naive':
            return BetaStageNaive(output)
        elif name == 'gamma':
            return GammaStage(output)
        elif name == 'delta':
            return DeltaStage(output)
        elif name == 'epsilon':
            return EpsilonStage(output)
        else:
            raise RuntimeError('unknnown stage name: %s' % name)

    def debug_msg(self, msg):
        logging.info(msg)
        if self.debug:
            print(msg)

    def read_text(self, text, aux_text=None, reset_context=True):
        if self.parser is None:
            self.debug_msg('creating parser...')
            self.parser = Parser()
            self.disamb = Disambiguation(self.hg, self.parser)
        nlp_parses = self.parser.parse_text(text)
        if reset_context:
            self.aux_text = text
            if aux_text:
                self.aux_text = '%s\n%s' % (text, aux_text)

        parses = [(p[0], self.read_sentence(Sentence(p[1])))
                  for p in nlp_parses]

        for p in parses:
            self.debug_msg('== extra ==')
            for edg in p[1].edges:
                self.debug_msg(ed.edge2str(edg))

        return parses

    def read_sentence(self, sentence):
        self.debug_msg('parsing sentence: %s' % sentence)
        if self.debug:
            sentence.print_tree()

        self.outputs = []

        last_stage_output = None
        first = True
        for name in self.stages:
            stage = self.create_stage(name, last_stage_output)
            self.debug_msg('executing %s stage...' % name)
            if first:
                last_stage_output = stage.process_sentence(sentence)
                first = False
            else:
                last_stage_output = stage.process()
            output = last_stage_output.tree.to_hyperedge_str(
                with_namespaces=self.show_namespaces)
            self.outputs.append(output)
            self.debug_msg(output)

        last_stage_output.main_edge = last_stage_output.tree.to_hyperedge()
        return last_stage_output
示例#4
0
class Extractor(object):
    def __init__(self, hg, alpha='default', beta='default', gamma='default', delta='default', epsilon='default'):
        self.hg = hg
        self.alpha = alpha
        self.beta = beta
        self.gamma = gamma
        self.delta = delta
        self.epsilon = epsilon
        self.parser = None
        self.debug = False
        self.alpha_output = None
        self.beta_output = None
        self.gamma_output = None
        self.delta_output = None
        self.epsilon_output = None

    def debug_msg(self, msg):
        if self.debug:
            print(msg)

    def create_alpha_stage(self):
        if self.alpha == 'default':
            return AlphaStage()
        else:
            raise RuntimeError('unknnown alpha stage type: %s' % self.alpha)

    def create_beta_stage(self, tree):
        if self.beta == 'default':
            return BetaStage(self.hg, tree)
        else:
            raise RuntimeError('unknnown beta stage type: %s' % self.beta)

    def create_gamma_stage(self, tree):
        if self.gamma == 'default':
            return GammaStage(tree)
        else:
            raise RuntimeError('unknnown gamma stage type: %s' % self.gamma)

    def create_delta_stage(self, tree):
        if self.delta == 'default':
            return DeltaStage(tree)
        else:
            raise RuntimeError('unknnown delta stage type: %s' % self.delta)

    def create_epsilon_stage(self, tree):
        if self.epsilon == 'default':
            return EpsilonStage(tree)
        else:
            raise RuntimeError('unknnown epsilon stage type: %s' % self.epsilon)

    def read_text(self, text):
        if self.parser is None:
            self.debug_msg('creating parser...')
            self.parser = Parser()
        sents = self.parser.parse_text(text)
        return [self.read_sentence(Sentence(sent)) for sent in sents]

    def read_sentence(self, sentence):
        self.debug_msg('parsing sentence: %s' % sentence)
        if self.debug:
            sentence.print_tree()

        alpha_stage = self.create_alpha_stage()
        self.debug_msg('executing alpha stage...')
        tree = alpha_stage.process_sentence(sentence)
        self.alpha_output = str(tree)
        self.debug_msg(self.alpha_output)

        beta_stage = self.create_beta_stage(tree)
        self.debug_msg('executing beta stage...')
        tree = beta_stage.process()
        self.beta_output = str(tree)
        self.debug_msg(self.beta_output)

        gamma_stage = self.create_gamma_stage(tree)
        self.debug_msg('executing gamma stage...')
        tree = gamma_stage.process()
        self.gamma_output = str(tree)
        self.debug_msg(self.gamma_output)

        delta_stage = self.create_delta_stage(tree)
        self.debug_msg('executing delta stage...')
        tree = delta_stage.process()
        self.delta_output = str(tree)
        self.debug_msg(self.delta_output)

        epsilon_stage = self.create_epsilon_stage(tree)
        self.debug_msg('executing epsilon stage...')
        tree = epsilon_stage.process()
        self.epsilon_output = str(tree)
        self.debug_msg(self.epsilon_output)

        return tree
示例#5
0
    def process_sentence(self, sentence):
        self.tree.root_id = self.process_token(sentence.root())
        self.tree.remove_redundant_nesting()
        return self.tree


def transform(sentence):
    alpha = AlphaStage()
    return alpha.process_sentence(sentence)


if __name__ == "__main__":
    test_text = """
    My name is James Bond.
    """

    print("Starting parser...")
    parser = Parser()
    print("Parsing...")
    result = parser.parse_text(test_text)

    print(result)

    for r in result:
        s = Sentence(r)
        print(s)
        s.print_tree()
        t = transform(s)
        print(t)
示例#6
0
def create_parser(lang='en'):
    return Parser(lang)
示例#7
0
class Reader(object):
    def __init__(self,
                 hg,
                 stages=('hypergen-forest', 'disamb-naive', 'merge', 'shallow',
                         'concepts'),
                 show_namespaces=False):
        self.hg = hg
        self.stages = stages
        self.parser = None
        self.disamb = None
        self.debug = False
        self.aux_text = ''
        self.show_namespaces = show_namespaces

    def create_stage(self, name, output):
        if name == 'hypergen-forest':
            return Hypergen(model_type='rf')
        elif name == 'hypergen-nn':
            return Hypergen(model_type='nn')
        elif name == 'disamb':
            return Disamb(self.hg, self.parser, self.disamb, output,
                          self.aux_text)
        elif name == 'disamb-simple':
            return DisambSimple(output)
        elif name == 'disamb-naive':
            return DisambNaive(output)
        elif name == 'merge':
            return Merge(output)
        elif name == 'shallow':
            return Shallow(output)
        elif name == 'concepts':
            return Concepts(output)
        else:
            raise RuntimeError('unknnown stage name: %s' % name)

    def debug_msg(self, msg):
        logging.info(msg)
        if self.debug:
            print(msg)

    def read_text(self, text, aux_text=None, reset_context=True):
        if self.parser is None:
            self.debug_msg('creating parser...')
            self.parser = Parser()
            self.disamb = Disambiguation(self.hg, self.parser)
        nlp_parses = self.parser.parse_text(text.strip())
        if reset_context:
            self.aux_text = text
            if aux_text:
                self.aux_text = '%s\n%s' % (text, aux_text)

        parses = [(p[0], self.read_sentence(Sentence(p[1])))
                  for p in nlp_parses]

        for p in parses:
            self.debug_msg('== extra ==')
            for edg in p[1].edges:
                self.debug_msg(ed.edge2str(edg))

        return parses

    def read_sentence(self, sentence):
        self.debug_msg('parsing sentence: %s' % sentence)
        if self.debug:
            sentence.print_tree()

        last_stage_output = None
        first = True
        for name in self.stages:
            stage = self.create_stage(name, last_stage_output)
            self.debug_msg('executing %s stage...' % name)
            if first:
                last_stage_output = stage.process_sentence(sentence)
                first = False
            else:
                last_stage_output = stage.process()
            output = last_stage_output.tree.to_hyperedge_str(
                with_namespaces=self.show_namespaces)
            self.debug_msg(output)

        last_stage_output.main_edge = last_stage_output.tree.to_hyperedge()

        # TODO: ugly...
        last_stage_output.sentence = None
        last_stage_output.tree = None

        return last_stage_output
示例#8
0
        return elem_id, transf

    def process_sentence(self, sentence):
        self.tree.root_id = self.process_token(sentence.root())[0]
        return ParserOutput(sentence, self.tree)


def transform(sentence):
    alpha = AlphaForest()
    return alpha.process_sentence(sentence)


if __name__ == '__main__':
    # learn('cases.csv', 'alpha_forest.model')

    test_text = """
        Satellites from NASA and other agencies have been tracking sea ice changes since 1979.
        """
    # test_text = 'Telmo is going to the gym.'

    print('Starting parser...')
    parser = Parser()
    print('Parsing...')
    result = parser.parse_text(test_text)

    for r in result:
        s = Sentence(r[1])
        t = transform(s)
        print(t.tree.to_hyperedge_str(with_namespaces=False))