def test_hash_for_paragraph(self): """hash_for_paragraph should standardize the given parameter. It should also use numbers in a large range -- an arbitrary hash should result in a relatively large number""" self.assertEqual(hash_for_paragraph('Abc 123 More.'), hash_for_paragraph(' abc123 mOrE')) random_term = uuid.uuid4().hex self.assertTrue(hash_for_paragraph(random_term) > 10000, msg="Hashed too small: {0}".format(random_term))
def test_hash_for_paragraph(self): """hash_for_paragraph should standardize the given parameter. It should also use numbers in a large range -- an arbitrary hash should result in a relatively large number""" self.assertEqual(hash_for_paragraph('Abc 123 More.'), hash_for_paragraph(' abc123 mOrE')) random_term = uuid.uuid4().hex self.assertTrue(hash_for_paragraph(random_term) > 10000, msg="Hashed too small: {}".format(random_term))
def replace_markerless(stack, node, depth): """Assign a unique index to all of the MARKERLESS paragraphs""" if node.label[-1] == mtypes.MARKERLESS: keyterm = KeyTerms.keyterm_in_node(node, ignore_definitions=False) if keyterm: p_num = hash_for_paragraph(keyterm) """Sometimes key terms will be repeated and the hash will be identical. This is here to catch that case.""" if 'p{0}'.format(p_num) in [item[1].label[0] for item in stack.m_stack[-1]]: p_num = hash_for_paragraph(keyterm + "dedupe") else: # len(n.label[-1]) < 6 filters out keyterm nodes p_num = sum(n.is_markerless() and len(n.label[-1]) < 6 for n in stack.peek_level(depth)) + 1 node.label[-1] = 'p{0}'.format(p_num)
def test_parse_amdpar_definition(): """We should correctly deduce which paragraphs are being updated, even when they are identified by definition alone""" text = ("Section 478.11 is amended by adding a definition for the " "term “Nonimmigrant visa” in alphabetical order to read as " "follows:") with assert_instruction_conversion(text, []) as expected_ctx: expected_ctx.POST(label='478-?-11-p{0}'.format( hash_for_paragraph("Nonimmigrant visa")))
def test_parse_amdpar_definition(): """We should correctly deduce which paragraphs are being updated, even when they are identified by definition alone""" text = ("Section 478.11 is amended by adding a definition for the " "term “Nonimmigrant visa” in alphabetical order to read as " "follows:") with assert_instruction_conversion(text, []) as expected_ctx: expected_ctx.POST(label='478-?-11-p{0}'.format(hash_for_paragraph( "Nonimmigrant visa")))
def marker(cls, header_text): """Derive a unique, repeatable identifier for this subtree. This allows the same category to be reordered (e.g. if a note has been added), or a header with multiple reserved categories to be split (which would also re-order the categories that followed)""" match = cls.CATEGORY_RE.match(header_text) if match: return 'p{0}'.format(hash_for_paragraph(match.group('category'))) else: logging.warning("Couldn't derive category: %s", header_text) return mtypes.MARKERLESS
def replace_markerless(self, stack, node, depth): """Assign a unique index to all of the MARKERLESS paragraphs""" if node.label[-1] == mtypes.MARKERLESS: keyterm = KeyTerms.keyterm_in_node(node, ignore_definitions=False) if keyterm: p_num = hash_for_paragraph(keyterm) else: # len(n.label[-1]) < 6 filters out keyterm nodes p_num = sum(n.is_markerless() and len(n.label[-1]) < 6 for n in stack.peek_level(depth)) + 1 node.label[-1] = 'p{}'.format(p_num)
def marker(cls, header_text): """Derive a unique, repeatable identifier for this subtree. This allows the same category to be reordered (e.g. if a note has been added), or a header with multiple reserved categories to be split (which would also re-order the categories that followed)""" match = cls.CATEGORY_RE.match(header_text) if match: return 'p{}'.format(hash_for_paragraph(match.group('category'))) else: logging.warn("Couldn't derive category: %s", header_text) return mtypes.MARKERLESS
def replace_markerless(self, stack, node, depth): """Assign a unique index to all of the MARKERLESS paragraphs""" if node.label[-1] == mtypes.MARKERLESS: keyterm = KeyTerms.get_keyterm(node, ignore_definitions=False) if keyterm: p_num = hash_for_paragraph(keyterm) else: # len(n.label[-1]) < 6 filters out keyterm nodes p_num = sum(n.is_markerless() and len(n.label[-1]) < 6 for n in stack.peek_level(depth)) + 1 node.label[-1] = 'p{}'.format(p_num)
def test_parse_amdpar_definition(self): """We should correctly deduce which paragraphs are being updated, even when they are identified by definition alone""" text = ("Section 478.11 is amended by adding a definition for the " u"term “Nonimmigrant visa” in alphabetical order to read as " "follows:") xml = etree.fromstring('<AMDPAR>%s</AMDPAR>' % text) instructions, _ = amdparser.parse_amdpar(xml, []) with XMLBuilder('EREGS_INSTRUCTIONS') as ctx: ctx.POST(label='478-?-11-p{}'.format( hash_for_paragraph("Nonimmigrant visa"))) self.assertEqual(etree.tostring(instructions), ctx.xml_str)
def test_parse_amdpar_definition(self): """We should correctly deduce which paragraphs are being updated, even when they are identified by definition alone""" text = ("Section 478.11 is amended by adding a definition for the " u"term “Nonimmigrant visa” in alphabetical order to read as " "follows:") xml = etree.fromstring('<AMDPAR>%s</AMDPAR>' % text) instructions, _ = amdparser.parse_amdpar(xml, []) with XMLBuilder('EREGS_INSTRUCTIONS') as ctx: ctx.POST(label='478-?-11-p{}'.format(hash_for_paragraph( "Nonimmigrant visa"))) self.assertEqual(etree.tostring(instructions), ctx.xml_str)
par_list[1] = match.section elif match.appendix: par_list[1] = "Appendix:" + match.appendix # Set paragraph depths for p in match_list[2:]: par_list[match_list.index(p)] = p par = tokens.Paragraph(par_list) return [par] _keyterm_label_part = ( Suppress(Marker("keyterm")) + QuotedString(quoteChar='(', endQuoteChar=')') ).setParseAction(lambda m: "p{}".format(hash_for_paragraph(m[0]))) _simple_label_part = Word(string.ascii_lowercase + string.ascii_uppercase + string.digits) _label_part = _keyterm_label_part | _simple_label_part override_label = ( Suppress("[") + Marker("label") + Suppress(":") + atomic.part + Suppress("-") + (atomic.section | atomic.appendix) + ZeroOrMore(Suppress("-") + _label_part) + Suppress("]") ).setParseAction(tokenize_override_ps) # Looks like: [subject-group(Some text Goes Here)]
par_list[1] = match.section elif match.appendix: par_list[1] = "Appendix:" + match.appendix # Set paragraph depths for p in match_list[2:]: par_list[match_list.index(p)] = p par = tokens.Paragraph.make(par_list) return [par] _keyterm_label_part = ( Suppress(Marker("keyterm")) + QuotedString(quoteChar='(', endQuoteChar=')') ).setParseAction(lambda m: "p{0}".format(hash_for_paragraph(m[0]))) _simple_label_part = Word(string.ascii_lowercase + string.ascii_uppercase + string.digits) _label_part = _keyterm_label_part | _simple_label_part override_label = ( Suppress("[") + Marker("label") + Suppress(":") + atomic.part + Suppress("-") + (atomic.section | atomic.appendix) + ZeroOrMore(Suppress("-") + _label_part) + Suppress("]") ).setParseAction(tokenize_override_ps) # Looks like: [subject-group(Some text Goes Here)]