Python hash_for_paragraph示例，regparser.tree.paragraph.hash_for_paragraph Python示例

示例#1

0

显示文件

 def test_hash_for_paragraph(self):
     """hash_for_paragraph should standardize the given parameter. It
     should also use numbers in a large range -- an arbitrary hash should
     result in a relatively large number"""
     self.assertEqual(hash_for_paragraph('Abc 123 More.'),
                      hash_for_paragraph(' abc123 mOrE'))
     random_term = uuid.uuid4().hex
     self.assertTrue(hash_for_paragraph(random_term) > 10000,
                     msg="Hashed too small: {0}".format(random_term))

示例#2

0

显示文件

文件： tree_paragraph_tests.py 项目： anthonygarvan/regulations-parser

 def test_hash_for_paragraph(self):
     """hash_for_paragraph should standardize the given parameter. It
     should also use numbers in a large range -- an arbitrary hash should
     result in a relatively large number"""
     self.assertEqual(hash_for_paragraph('Abc 123 More.'),
                      hash_for_paragraph(' abc123 mOrE'))
     random_term = uuid.uuid4().hex
     self.assertTrue(hash_for_paragraph(random_term) > 10000,
                     msg="Hashed too small: {}".format(random_term))

示例#3

0

显示文件

    def replace_markerless(stack, node, depth):
        """Assign a unique index to all of the MARKERLESS paragraphs"""
        if node.label[-1] == mtypes.MARKERLESS:
            keyterm = KeyTerms.keyterm_in_node(node, ignore_definitions=False)
            if keyterm:
                p_num = hash_for_paragraph(keyterm)
                """Sometimes key terms will be repeated and the hash will be
                identical. This is here to catch that case."""
                if 'p{0}'.format(p_num) in [item[1].label[0] for item in stack.m_stack[-1]]:
                    p_num = hash_for_paragraph(keyterm + "dedupe")

            else:
                # len(n.label[-1]) < 6 filters out keyterm nodes
                p_num = sum(n.is_markerless() and len(n.label[-1]) < 6
                            for n in stack.peek_level(depth)) + 1
            node.label[-1] = 'p{0}'.format(p_num)

示例#4

0

显示文件

def test_parse_amdpar_definition():
    """We should correctly deduce which paragraphs are being updated, even
    when they are identified by definition alone"""
    text = ("Section 478.11 is amended by adding a definition for the "
            "term “Nonimmigrant visa” in alphabetical order to read as "
            "follows:")
    with assert_instruction_conversion(text, []) as expected_ctx:
        expected_ctx.POST(label='478-?-11-p{0}'.format(
            hash_for_paragraph("Nonimmigrant visa")))

示例#5

0

显示文件

文件： amdparser_tests.py 项目： eregs/regulations-parser

def test_parse_amdpar_definition():
    """We should correctly deduce which paragraphs are being updated, even
    when they are identified by definition alone"""
    text = ("Section 478.11 is amended by adding a definition for the "
            "term “Nonimmigrant visa” in alphabetical order to read as "
            "follows:")
    with assert_instruction_conversion(text, []) as expected_ctx:
        expected_ctx.POST(label='478-?-11-p{0}'.format(hash_for_paragraph(
            "Nonimmigrant visa")))

示例#6

0

显示文件

文件： import_category.py 项目： whytheplatypus/regulations-parser

 def marker(cls, header_text):
     """Derive a unique, repeatable identifier for this subtree. This
     allows the same category to be reordered (e.g. if a note has been
     added), or a header with multiple reserved categories to be split
     (which would also re-order the categories that followed)"""
     match = cls.CATEGORY_RE.match(header_text)
     if match:
         return 'p{0}'.format(hash_for_paragraph(match.group('category')))
     else:
         logging.warning("Couldn't derive category: %s", header_text)
         return mtypes.MARKERLESS

示例#7

0

显示文件

文件： paragraph_processor.py 项目： anthonygarvan/regulations-parser

 def replace_markerless(self, stack, node, depth):
     """Assign a unique index to all of the MARKERLESS paragraphs"""
     if node.label[-1] == mtypes.MARKERLESS:
         keyterm = KeyTerms.keyterm_in_node(node, ignore_definitions=False)
         if keyterm:
             p_num = hash_for_paragraph(keyterm)
         else:
             # len(n.label[-1]) < 6 filters out keyterm nodes
             p_num = sum(n.is_markerless() and len(n.label[-1]) < 6
                         for n in stack.peek_level(depth)) + 1
         node.label[-1] = 'p{}'.format(p_num)

示例#8

0

显示文件

文件： import_category.py 项目： anthonygarvan/regulations-parser

 def marker(cls, header_text):
     """Derive a unique, repeatable identifier for this subtree. This
     allows the same category to be reordered (e.g. if a note has been
     added), or a header with multiple reserved categories to be split
     (which would also re-order the categories that followed)"""
     match = cls.CATEGORY_RE.match(header_text)
     if match:
         return 'p{}'.format(hash_for_paragraph(match.group('category')))
     else:
         logging.warn("Couldn't derive category: %s", header_text)
         return mtypes.MARKERLESS

示例#9

0

显示文件

文件： paragraph_processor.py 项目： cmc333333/regulations-parser

 def replace_markerless(self, stack, node, depth):
     """Assign a unique index to all of the MARKERLESS paragraphs"""
     if node.label[-1] == mtypes.MARKERLESS:
         keyterm = KeyTerms.get_keyterm(node, ignore_definitions=False)
         if keyterm:
             p_num = hash_for_paragraph(keyterm)
         else:
             # len(n.label[-1]) < 6 filters out keyterm nodes
             p_num = sum(n.is_markerless() and len(n.label[-1]) < 6
                         for n in stack.peek_level(depth)) + 1
         node.label[-1] = 'p{}'.format(p_num)

示例#10

0

显示文件

    def test_parse_amdpar_definition(self):
        """We should correctly deduce which paragraphs are being updated, even
        when they are identified by definition alone"""
        text = ("Section 478.11 is amended by adding a definition for the "
                u"term “Nonimmigrant visa” in alphabetical order to read as "
                "follows:")
        xml = etree.fromstring('<AMDPAR>%s</AMDPAR>' % text)
        instructions, _ = amdparser.parse_amdpar(xml, [])

        with XMLBuilder('EREGS_INSTRUCTIONS') as ctx:
            ctx.POST(label='478-?-11-p{}'.format(
                hash_for_paragraph("Nonimmigrant visa")))
        self.assertEqual(etree.tostring(instructions), ctx.xml_str)

示例#11

0

显示文件

文件： notice_amdparser_tests.py 项目： cmc333333/regulations-parser

    def test_parse_amdpar_definition(self):
        """We should correctly deduce which paragraphs are being updated, even
        when they are identified by definition alone"""
        text = ("Section 478.11 is amended by adding a definition for the "
                u"term “Nonimmigrant visa” in alphabetical order to read as "
                "follows:")
        xml = etree.fromstring('<AMDPAR>%s</AMDPAR>' % text)
        instructions, _ = amdparser.parse_amdpar(xml, [])

        with XMLBuilder('EREGS_INSTRUCTIONS') as ctx:
            ctx.POST(label='478-?-11-p{}'.format(hash_for_paragraph(
                "Nonimmigrant visa")))
        self.assertEqual(etree.tostring(instructions), ctx.xml_str)

示例#12

0

显示文件

文件： amdpar.py 项目： tadhg-ohiggins/regulations-parser

        par_list[1] = match.section
    elif match.appendix:
        par_list[1] = "Appendix:" + match.appendix

    # Set paragraph depths
    for p in match_list[2:]:
        par_list[match_list.index(p)] = p

    par = tokens.Paragraph(par_list)
    return [par]


_keyterm_label_part = (
    Suppress(Marker("keyterm")) +
    QuotedString(quoteChar='(', endQuoteChar=')')
).setParseAction(lambda m: "p{}".format(hash_for_paragraph(m[0])))
_simple_label_part = Word(string.ascii_lowercase + string.ascii_uppercase +
                          string.digits)
_label_part = _keyterm_label_part | _simple_label_part

override_label = (
    Suppress("[") +
    Marker("label") + Suppress(":") +
    atomic.part +
    Suppress("-") +
    (atomic.section | atomic.appendix) +
    ZeroOrMore(Suppress("-") + _label_part) +
    Suppress("]")
).setParseAction(tokenize_override_ps)

# Looks like: [subject-group(Some text Goes Here)]

示例#13

0

显示文件

文件： amdpar.py 项目： whytheplatypus/regulations-parser

        par_list[1] = match.section
    elif match.appendix:
        par_list[1] = "Appendix:" + match.appendix

    # Set paragraph depths
    for p in match_list[2:]:
        par_list[match_list.index(p)] = p

    par = tokens.Paragraph.make(par_list)
    return [par]


_keyterm_label_part = (
    Suppress(Marker("keyterm")) +
    QuotedString(quoteChar='(', endQuoteChar=')')
).setParseAction(lambda m: "p{0}".format(hash_for_paragraph(m[0])))
_simple_label_part = Word(string.ascii_lowercase + string.ascii_uppercase +
                          string.digits)
_label_part = _keyterm_label_part | _simple_label_part

override_label = (
    Suppress("[") +
    Marker("label") + Suppress(":") +
    atomic.part +
    Suppress("-") +
    (atomic.section | atomic.appendix) +
    ZeroOrMore(Suppress("-") + _label_part) +
    Suppress("]")
).setParseAction(tokenize_override_ps)

# Looks like: [subject-group(Some text Goes Here)]