Пример #1
0
def edge2pattern(edge, root=False, subtype=False):
    if root and edge.is_atom():
        root_str = edge.root()
    else:
        root_str = '*'
    if subtype:
        et = edge.type()
    else:
        et = edge.type()[0]
    pattern = '{}/{}'.format(root_str, et)
    ar = edge.argroles()
    if ar == '':
        return hedge(pattern)
    else:
        return hedge('{}.{}'.format(pattern, ar))
Пример #2
0
def normalize_edge(edge):
    if edge.is_atom():
        return edge
    conn = edge[0]
    ar = conn.argroles()
    if ar != '':
        roles_edges = zip(ar, edge[1:])
        roles_edges = sorted(roles_edges,
                             key=lambda role_edge: argrole_order[role_edge[0]])
        ar = ''.join([role_edge[0] for role_edge in roles_edges])
        pred = conn.atom()
        new_pred = hedge('{}/{}.{}'.format(pred.root(), pred.type(), ar))
        conn = conn.replace_atom(pred, new_pred)
        edge = hedge([conn] + list(role_edge[1] for role_edge in roles_edges))
    return hedge([normalize_edge(subedge) for subedge in edge])
Пример #3
0
 def parse_and_add(self, text, hg, sequence=None, set_text=True):
     parse_results = self.parse(text)
     for parse in parse_results['parses']:
         main_edge = parse['resolved_corefs']
         if self.corefs:
             unresolved_edge = parse['main_edge']
         else:
             unresolved_edge = None
         # add main edge
         if main_edge:
             if sequence:
                 hg.add_to_sequence(sequence, main_edge)
             else:
                 hg.add(main_edge)
             # attach text to edge and subedges
             _set_edges_text(main_edge, hg, parse)
             if self.corefs:
                 if unresolved_edge != main_edge:
                     _set_edges_text(main_edge, hg, parse)
                 coref_res_edge = hedge(
                     (const.coref_res_pred, unresolved_edge, main_edge))
                 hg.add(coref_res_edge)
             # add extra edges
             for edge in parse['extra_edges']:
                 hg.add(edge)
     for edge in parse_results['inferred_edges']:
         hg.add(edge, count=True)
     return parse_results
Пример #4
0
 def test_add_count(self):
     self.hg.destroy()
     edge = hedge('(is/Pd graphbrain/Cp great/C)')
     self.hg.add(edge, count=True)
     self.assertEqual(self.hg.get_int_attribute(edge, 'count'), 1)
     self.hg.add(edge, count=True)
     self.assertEqual(self.hg.get_int_attribute(edge, 'count'), 2)
Пример #5
0
def test_parser(args):
    parser = create_parser(lang=args.lang, parser_class=args.parser)

    total = 0
    wrong = 0

    sentence = None
    with open(args.infile) as f:
        for line in f:
            if sentence:
                total += 1
                correct_edge = hedge(line.strip())
                parser_output = parser.parse(sentence)
                parsed_sentence = parser_output['parses'][0]
                edge = parsed_sentence['main_edge']
                sent = parsed_sentence['spacy_sentence']
                if edge != correct_edge:
                    wrong += 1
                    print_tree(sent.root)
                    print('expected:')
                    print(correct_edge)
                    print('result:')
                    print(edge)
                sentence = None
            else:
                sentence = line.strip()

    print('%s wrong out of %s.' % (wrong, total))
Пример #6
0
def input_defects(sentence, edge):
    s = colored('s', 'magenta')
    h = colored('h', 'cyan')
    i = colored('i', 'yellow')
    options_str = '{}/{}/{}/subedge'.format(s, h, i)
    input_msg = 'wrong subedge ({}) ? '.format(options_str)

    defects = None
    while not defects:
        answer = input(input_msg)
        if answer == 's':
            print('\n{}\n'.format(sentence))
        elif answer == 'h':
            print('\n{}\n'.format(colored_edge(edge)))
        elif answer == 'i':
            print('\n{}\n'.format(indented(edge)))
        else:
            edge_strs = answer.split('&')
            subedges = []
            failed = False
            for edge_str in edge_strs:
                subedge = hedge(edge_str)
                if subedge is None:
                    error_msg('{} did not parse correctly.'.format(edge_str))
                    failed = True
                elif edge.contains(subedge, deep=True):
                    subedges.append(subedge)
                else:
                    error_msg('{} is not a subedge of {}.'.format(
                        subedge.to_str(), edge.to_str()))
                    failed = True
            if not failed:
                defects = subedges
    return defects
Пример #7
0
    def process_edge(self, edge, depth):
        hg = self.system.get_hg(self)

        if not edge.is_atom():
            ct = edge.connector_type()
            if ct[:2] == 'Pd':
                pred = edge[0]
                if (len(edge) > 2 and deep_lemma(hg, pred).root()
                        in CONFLICT_PRED_LEMMAS):
                    subjects = edge.edges_with_argrole('s')
                    objects = edge.edges_with_argrole('o')
                    if len(subjects) == 1 and len(objects) == 1:
                        subject = strip_concept(subjects[0])
                        obj = strip_concept(objects[0])
                        if (subject and obj and has_proper_concept(subject)
                                and has_proper_concept(obj)):
                            actor_orig = main_coref(hg, subject)
                            actor_targ = main_coref(hg, obj)
                            conflict_edge = hedge(
                                ('conflict/P/.', actor_orig, actor_targ, edge))
                            if (is_actor(hg, actor_orig)
                                    and is_actor(hg, actor_targ)):
                                yield create_op(conflict_edge)
                                for wedge in self._topics(
                                        hg, actor_orig, actor_targ, edge):
                                    yield wedge
                                self.conflicts += 1
Пример #8
0
 def test_degrees(self):
     self.hg.destroy()
     graphbrain = hedge('graphbrain/1')
     great = hedge('great/1')
     self.assertEqual(self.hg.degree(graphbrain), 0)
     self.hg.add('(is graphbrain/1 great/1)')
     self.assertEqual(self.hg.degree(graphbrain), 1)
     self.assertEqual(self.hg.degree(great), 1)
     self.hg.add('(size graphbrain/1 7)')
     self.assertEqual(self.hg.degree(graphbrain), 2)
     self.assertEqual(self.hg.degree(great), 1)
     self.hg.remove(hedge('(is graphbrain/1 great/1)'))
     self.assertEqual(self.hg.degree(graphbrain), 1)
     self.assertEqual(self.hg.degree(great), 0)
     self.hg.remove(hedge('(size graphbrain/1 7)'))
     self.assertEqual(self.hg.degree(graphbrain), 0)
Пример #9
0
 def test_non_primary_to_primary(self):
     self.hg.destroy()
     edge = hedge('(is/P (the/M sun/C) shining/C)')
     self.hg.add(edge, primary=False)
     self.assertFalse(self.hg.is_primary(edge))
     self.hg.add(edge, primary=True)
     self.assertTrue(self.hg.is_primary(edge))
Пример #10
0
def clean_edge(edge):
    if not edge.is_atom():
        return edge
    catom = edge.root()
    catom = catom.replace('_', '')
    catom = unidecode(catom)
    return hedge(catom)
Пример #11
0
    def test_counter4(self):
        pc = PatternCounter(match_roots={'./P'})
        pc.count(hedge('((not/M is/P.sc) mary/C (not/M nice/C))'))

        self.assertTrue(pc.patterns[hedge('(*/P.sc */C */C)')] == 0)
        self.assertTrue(pc.patterns[hedge('((*/M */P.sc) */C */C)')] == 0)
        self.assertTrue(pc.patterns[hedge('((*/M is/P.sc) */C */C)')] == 1)
        self.assertTrue(pc.patterns[hedge('(*/P.sc */C (*/M */C))')] == 0)
        self.assertTrue(
            pc.patterns[hedge('((*/M */P.sc) */C (*/M */C))')] == 0)
        self.assertTrue(pc.patterns[hedge('(*/M */P.sc)')] == 0)
        self.assertTrue(pc.patterns[hedge('(*/M is/P.sc)')] == 1)
        self.assertTrue(pc.patterns[hedge('(*/M */C)')] == 1)
Пример #12
0
def replace_subject(edge, new_subject):
    connector = edge[0]
    new_edge = list(edge)

    for pos, role in enumerate(connector.argroles()):
        if role == 's':
            new_edge[pos + 1] = new_subject
    return hedge(new_edge)
Пример #13
0
 def count(self, edge):
     if not edge.is_atom():
         if self._matches_expansions(edge):
             for pattern in self._edge2patterns(edge):
                 self.patterns[hedge(pattern)] += 1
         if self.count_subedges:
             for subedge in edge:
                 self.count(subedge)
Пример #14
0
 def test_counters3_non_deep_removal(self):
     self.hg.destroy()
     self.hg.add('(says mary/C (is graphbrain/C great/C))')
     self.hg.remove(hedge('(says mary/C (is graphbrain/C great/C))'),
                    deep=False)
     self.assertEqual(self.hg.atom_count(), 5)
     self.assertEqual(self.hg.primary_atom_count(), 0)
     self.assertEqual(self.hg.edge_count(), 6)
     self.assertEqual(self.hg.primary_edge_count(), 0)
Пример #15
0
def blocks(edge, subtypes=False, argroles=True, namespaces=False):
    edge = hedge(edge)
    sedge = edge.simplify(subtypes=subtypes,
                          argroles=argroles,
                          namespaces=namespaces)
    html = _edge2html_blocks(sedge)
    html = '<div style="background-color:#fcfcfc; padding:50px">{}'\
           '</div>'.format(html)
    display(HTML(html))
Пример #16
0
 def _edge2patterns(self, edge):
     force_subtypes = self._force_subtypes(edge)
     force_root, _ = self._force_root_expansion(edge)
     return list(
         hedge(pattern)
         for pattern in self._list2patterns(list(edge.normalized()),
                                            force_subtypes=force_subtypes,
                                            force_root=force_root,
                                            force_expansion=False))
Пример #17
0
 def test_search(self):
     self.hg.destroy()
     self.hg.add('(is/Pd graphbrain/Cp great/C)')
     self.hg.add('(says/Pd mary/Cp)')
     self.hg.add('(says/Pd mary/Cp (is/Pd graphbrain/Cp great/C))')
     self.hg.add('(says/Pd mary/Cp (is/Pd graphbrain/Cp great/C) extra/C)')
     self.assertEqual(list(self.hg.search('(* graphbrain/Cp *)')),
                      [hedge('(is/Pd graphbrain/Cp great/C)')])
     self.assertEqual(list(self.hg.search('(is/Pd graphbrain/Cp *)')),
                      [hedge('(is/Pd graphbrain/Cp great/C)')])
     self.assertEqual(list(self.hg.search('(x * *)')), [])
     self.assertEqual(
         list(self.hg.search('(says/Pd * '
                             '(is/Pd graphbrain/Cp great/C))')),
         [hedge('(says/Pd mary/Cp (is/Pd graphbrain/Cp great/C))')])
     self.assertEqual(
         list(self.hg.search('(says/Pd * (is/Pd * *))')),
         [hedge('(says/Pd mary/Cp (is/Pd graphbrain/Cp great/C))')])
Пример #18
0
 def test_inc_attributes_does_not_exist(self):
     self.hg.destroy()
     edge = hedge('(is graphbrain/1 great/1)')
     self.hg.add(edge)
     self.assertEqual(self.hg.get_int_attribute(edge, 'foo'), None)
     self.hg.inc_attribute(edge, 'foo')
     self.assertEqual(self.hg.get_int_attribute(edge, 'foo'), 1)
     self.hg.inc_attribute(edge, 'foo')
     self.assertEqual(self.hg.get_int_attribute(edge, 'foo'), 2)
Пример #19
0
 def test_batch_adds(self):
     self.hg.destroy()
     edges = []
     for i in range(10):
         edges.append(hedge('(is/P {}/C number/C)'.format(i)))
     with hopen('test.db') as hg:
         for edge in edges:
             hg.add(edge)
     for edge in edges:
         self.assertTrue(self.hg.exists(edge))
Пример #20
0
    def test_counter1(self):
        pc = PatternCounter()
        pc.count(hedge('((not/M is/P.sc) mary/C (not/M nice/C))'))

        self.assertTrue(pc.patterns[hedge('(*/P.sc */C */C)')] == 1)
        self.assertTrue(pc.patterns[hedge('((*/M */P.sc) */C */C)')] == 1)
        self.assertTrue(pc.patterns[hedge('(*/P.sc */C (*/M */C))')] == 1)
        self.assertTrue(
            pc.patterns[hedge('((*/M */P.sc) */C (*/M */C))')] == 1)
        self.assertTrue(pc.patterns[hedge('(*/M */P.sc)')] == 1)
        self.assertFalse(pc.patterns[hedge('(*/M is/P.sc)')] == 1)
        self.assertTrue(pc.patterns[hedge('(*/M */C)')] == 1)
Пример #21
0
def show(edge, style='indented'):
    """Displays a representation of the edge in the notebook.

    Keyword arguments:
    style -- render style ('indented', 'line')
    (default: 'indented')
    """
    edge = hedge(edge)
    html = _edge2html_show(edge, style=style)[0]
    display(HTML(html))
Пример #22
0
 def test_star_limit(self):
     self.hg.destroy()
     self.hg.add('(is graphbrain/1 great/1)')
     self.hg.add('(is graphbrain/1 great/2)')
     self.hg.add('(is graphbrain/1 great/3)')
     center = hedge('graphbrain/1')
     self.assertEqual(len(list(self.hg.star(center))), 3)
     self.assertEqual(len(list(self.hg.star(center, limit=1))), 1)
     self.assertEqual(len(list(self.hg.star(center, limit=2))), 2)
     self.assertEqual(len(list(self.hg.star(center, limit=10))), 3)
Пример #23
0
def coref_set(hg, edge, corefs=None):
    """Returns the set of coreferences that the given edge belongs to."""
    if corefs is None:
        corefs = {edge}
    for coref_edge in hg.edges_with_edges((hedge(coref_pred), edge)):
        if len(coref_edge) == 3 and coref_edge[0].to_str() == coref_pred:
            for item in coref_edge[1:]:
                if item not in corefs:
                    corefs.add(item)
                    coref_set(hg, item, corefs)
    return corefs
Пример #24
0
 def test_deep_degrees(self):
     self.hg.destroy()
     edge1 = hedge('((is/M going/P) mary/C (to (the/M gym/C)))')
     self.hg.add(edge1)
     mary = hedge('mary/C')
     gym = hedge('gym/C')
     is_going = hedge('(is/M going/P)')
     self.assertEqual(self.hg.deep_degree(edge1), 0)
     self.assertEqual(self.hg.degree(mary), 1)
     self.assertEqual(self.hg.deep_degree(mary), 1)
     self.assertEqual(self.hg.degree(gym), 0)
     self.assertEqual(self.hg.deep_degree(gym), 1)
     self.assertEqual(self.hg.degree(is_going), 1)
     self.assertEqual(self.hg.deep_degree(is_going), 1)
     self.assertEqual(self.hg.deep_degree(gym), 1)
     edge2 = hedge('((is/M going/P) john/C (to (the/M gym/C)))')
     self.hg.add(edge2)
     self.assertEqual(self.hg.degree(gym), 0)
     self.assertEqual(self.hg.deep_degree(gym), 2)
     self.assertEqual(self.hg.degree(is_going), 2)
     self.assertEqual(self.hg.deep_degree(is_going), 2)
Пример #25
0
 def test_add_with_attributes2(self):
     self.hg.destroy()
     edge = hedge('(is graphbrain/1 great/1)')
     self.hg.add_with_attributes(edge, {
         'p': 0,
         'd': 10,
         'dd': 20,
         'foo': 777,
         'bar': -.77,
         'xx': 'def'
     })
     self.assertFalse(self.hg.is_primary(edge))
Пример #26
0
 def test_search_star(self):
     self.hg.destroy()
     self.hg.add('(is/Pd graphbrain/Cp great/C)')
     self.hg.add('(says/Pd mary/Cp)')
     self.hg.add('(says/Pd mary/Cp (is/Pd graphbrain/Cp great/C))')
     self.hg.add('(says/Pd mary/Cp (is/Pd graphbrain/Cp great/C) extra/C)')
     self.assertEqual(
         set(self.hg.search('*')), {
             hedge('(is/Pd graphbrain/Cp great/C)'),
             hedge('(says/Pd mary/Cp)'),
             hedge('(says/Pd mary/Cp '
                   '(is/Pd graphbrain/Cp great/C))'),
             hedge('(says/Pd mary/Cp '
                   '(is/Pd graphbrain/Cp great/C) extra/C)'),
             hedge('extra/C'),
             hedge('graphbrain/Cp'),
             hedge('great/C'),
             hedge('is/Pd'),
             hedge('mary/Cp'),
             hedge('says/Pd')
         })
Пример #27
0
    def test_counter6(self):
        pc = PatternCounter(count_subedges=False, match_subtypes={'*/M'})
        pc.count(hedge('((not/Mn is/Pd.sc) mary/Cp.s (very/M nice/Cc.s))'))

        self.assertTrue(pc.patterns[hedge('(*/P.sc */C */C)')] == 1)
        self.assertTrue(pc.patterns[hedge('((*/Mn */P.sc) */C */C)')] == 1)
        self.assertTrue(pc.patterns[hedge('(*/P.sc */C (*/M */C))')] == 1)
        self.assertTrue(
            pc.patterns[hedge('((*/Mn */P.sc) */C (*/M */C))')] == 1)
        self.assertFalse(pc.patterns[hedge('(*/Mn */P.sc)')] == 1)
        self.assertFalse(pc.patterns[hedge('(*/M */C)')] == 1)
Пример #28
0
 def test_add_with_attributes_search(self):
     self.hg.destroy()
     edge = hedge('(is graphbrain/1 great/1)')
     self.hg.add_with_attributes(edge, {
         'p': 1,
         'd': 10,
         'dd': 20,
         'foo': 777,
         'bar': -.77,
         'xx': 'def'
     })
     results = set(self.hg.search('(is * *)'))
     self.assertEqual(results, set([edge]))
Пример #29
0
def are_corefs(hg, edge1, edge2, corefs=None):
    """Checks if the two given edges are coreferences."""
    if corefs is None:
        corefs = {edge1}
    for coref_edge in hg.edges_with_edges((hedge(coref_pred), edge1)):
        if len(coref_edge) == 3 and coref_edge[0].to_str() == coref_pred:
            for item in coref_edge[1:]:
                if item not in corefs:
                    if item == edge2:
                        return True
                    corefs.add(item)
                    if are_corefs(hg, item, edge2, corefs):
                        return True
    return False
Пример #30
0
 def test_primary_2(self):
     self.hg.destroy()
     edge1 = hedge('((is/M going/P) mary/C (to (the/M gym/C)))')
     self.hg.add(edge1, primary=False)
     self.assertFalse(self.hg.is_primary(edge1))
     self.hg.set_primary(edge1, True)
     self.assertTrue(self.hg.is_primary(edge1))
     self.assertFalse(self.hg.is_primary(hedge('(is/M going/P)')))
     self.hg.set_primary(hedge('(is/M going/P)'), True)
     self.assertTrue(self.hg.is_primary(hedge('(is/M going/P)')))
     self.assertFalse(self.hg.is_primary(hedge('mary/C')))
     self.hg.set_primary(hedge('mary/C'), True)
     self.assertTrue(self.hg.is_primary(hedge('mary/C')))