def replace_markerless(self, stack, node, depth):
     """Assign a unique index to all of the MARKERLESS paragraphs"""
     if node.label[-1] == mtypes.MARKERLESS:
         keyterm = KeyTerms.get_keyterm(node, ignore_definitions=False)
         if keyterm:
             p_num = keyterm_to_int(keyterm)
         else:
             # len(n.label[-1]) < 6 filters out keyterm nodes
             p_num = sum(n.is_markerless() and len(n.label[-1]) < 6
                         for n in stack.peek_level(depth)) + 1
         node.label[-1] = 'p{}'.format(p_num)
Exemplo n.º 2
0
        par_list[1] = match.section
    elif match.appendix:
        par_list[1] = "Appendix:" + match.appendix

    # Set paragraph depths
    for p in match_list[2:]:
        par_list[match_list.index(p)] = p

    par = tokens.Paragraph(par_list)
    return [par]


_keyterm_label_part = (
    Suppress(Marker("keyterm")) +
    QuotedString(quoteChar='(', endQuoteChar=')')
).setParseAction(lambda m: "p{}".format(keyterm_to_int(m[0])))
_simple_label_part = Word(string.ascii_lowercase + string.ascii_uppercase +
                          string.digits)
_label_part = _keyterm_label_part | _simple_label_part

override_label = (
    Suppress("[") +
    Marker("label") + Suppress(":") +
    atomic.part +
    Suppress("-") +
    (atomic.section | atomic.appendix) +
    ZeroOrMore(Suppress("-") + _label_part) +
    Suppress("]")
).setParseAction(tokenize_override_ps)

# Looks like: [subject-group(Some text Goes Here)]
 def test_keyterm_to_int(self):
     """keyterm_to_int should standardize the keyterm"""
     self.assertEqual(keyterm_to_int('Abc 123 More.'),
                      keyterm_to_int(' abc123 mOrE'))
     self.assertTrue(keyterm_to_int('a term') > 10000)