Python marker_of示例，regparser.layer.paragraph_markers.marker_of Python示例

示例#1

0

显示文件

    def keyterm_in_node(cls, node, ignore_definitions=True):
        tagged = node.tagged_text.replace(marker_of(node), '', 1).strip()
        keyterm = keyterm_in_text(tagged)

        if keyterm and not (ignore_definitions
                            and cls.is_definition(node, keyterm)):
            return keyterm

示例#2

0

显示文件

    def process_node_text(node):
        """Take a paragraph, remove the marker, and extraneous whitespaces."""
        marker = marker_of(node)
        text = node.tagged_text

        text = text.replace(marker, '', 1).strip()
        return text

示例#3

0

显示文件

def create_add_amendment(amendment):
    """ An amendment comes in with a whole tree structure. We break apart the
    tree here (this is what flatten does), convert the Node objects to JSON
    representations. This ensures that each amendment only acts on one node.
    In addition, this futzes with the change's field when stars are present.
    """

    nodes_list = []
    flatten_tree(nodes_list, amendment['node'])
    changes = [format_node(n, amendment) for n in nodes_list]

    for change in filter(lambda c: c.values()[0]['action'] == 'PUT', changes):
        label = change.keys()[0]
        node = struct.find(amendment['node'], label)
        text = node.text.strip()
        marker = marker_of(node)
        text = text[len(marker):].strip()
        # Text is stars, but this is not the root. Explicitly try to keep
        # this node
        if text == '* * *':
            change[label]['action'] = Verb.KEEP

        # If text ends with a colon and is followed by stars, assume we are
        # only modifying the intro text
        if (text[-1:] == ':' and node.label == amendment['node'].label
                and node.source_xml is not None):
            following = node.source_xml.getnext()
            if following is not None and following.tag == 'STARS':
                change[label]['field'] = '[text]'

    return changes

示例#4

0

显示文件

文件： changes.py 项目： jmcarp/regulations-parser

def create_add_amendment(amendment):
    """ An amendment comes in with a whole tree structure. We break apart the
    tree here (this is what flatten does), convert the Node objects to JSON
    representations. This ensures that each amendment only acts on one node.
    In addition, this futzes with the change's field when stars are present.
    """

    nodes_list = []
    flatten_tree(nodes_list, amendment['node'])
    changes = [format_node(n, amendment) for n in nodes_list]

    for change in filter(lambda c: c.values()[0]['action'] == 'PUT', changes):
        label = change.keys()[0]
        node = struct.find(amendment['node'], label)
        text = node.text.strip()
        marker = marker_of(node)
        text = text[len(marker):].strip()
        # Text is stars, but this is not the root. Explicitly try to keep
        # this node
        if text == '* * *':
            change[label]['action'] = 'KEEP'

        # If text ends with a colon and is followed by stars, assume we are
        # only modifying the intro text
        if (text[-1:] == ':' and node.label == amendment['node'].label
                and node.source_xml is not None):
            following = node.source_xml.getnext()
            if following is not None and following.tag == 'STARS':
                change[label]['field'] = '[text]'

    return changes

示例#5

0

显示文件

文件： changes.py 项目： jposi/regulations-parser

def create_add_amendment(amendment):
    """ An amendment comes in with a whole tree structure. We break apart the
    tree here (this is what flatten does), convert the Node objects to JSON
    representations. This ensures that each amendment only acts on one node.
    In addition, this futzes with the change's field when stars are present.
    """

    nodes_list = []
    flatten_tree(nodes_list, amendment['node'])
    nodes = [format_node(n, amendment) for n in nodes_list]
    label = amendment['node'].label_id()
    root_change = [d for d in nodes if label in d][0][label]

    text = amendment['node'].text.strip()
    marker = marker_of(amendment['node'])
    text = text[len(marker):].strip()
    # Text is stars, but this is marked as edited -- assume we are only
    # modifying the child paragraphs
    if text == '* * *':
        root_change['field'] = '[children]'

    # If text ends with a colon and is followed by stars, assume we are only
    # modifying the intro text
    if text[-1:] == ':' and amendment['node'].source_xml is not None:
        following = amendment['node'].source_xml.getnext()
        if following is not None and following.tag == 'STARS':
            root_change['field'] = '[text]'

    return nodes

示例#6

0

显示文件

文件： key_terms.py 项目： eregs/regulations-parser

    def keyterm_in_node(cls, node, ignore_definitions=True):
        tagged = node.tagged_text.replace(marker_of(node), '', 1).strip()
        keyterm = keyterm_in_text(tagged)

        if keyterm and not (ignore_definitions and
                            cls.is_definition(node, keyterm)):
            return keyterm

示例#7

0

显示文件

文件： key_terms.py 项目： anthonygarvan/regulations-parser

    def keyterm_in_node(cls, node, ignore_definitions=True):
        tagged = (getattr(node, 'tagged_text', None) or '')
        tagged = tagged.replace(marker_of(node), '', 1).strip()
        keyterm = keyterm_in_text(tagged)

        if keyterm and not (ignore_definitions and
                            cls.is_definition(node, keyterm)):
            return keyterm

示例#8

0

显示文件

 def test_marker_of_range(self):
     """In addition to single paragraph markers, we should account for
     cases of multiple markers being present. We've encountered this for
     "Reserved" paragraphs, but there are likely other scenarios"""
     for marker, text in (('(b) - (d)', '(b) - (d) Reserved'),
                          ('(b)-(d)', '(b)-(d) Some Words'),
                          ('b. - d.', 'b. - d. Can be ignored'),
                          ('b.-d.', 'b.-d. Has no negative numbers'),
                          ('(b)', '(b) -1.0 is negative')):
         self.assertEqual(marker, marker_of(Node(text=text, label=['b'])))

示例#9

0

显示文件

文件： layer_paragraph_markers_tests.py 项目： eregs/regulations-parser

 def test_marker_of_range(self):
     """In addition to single paragraph markers, we should account for
     cases of multiple markers being present. We've encountered this for
     "Reserved" paragraphs, but there are likely other scenarios"""
     for marker, text in (('(b) - (d)', '(b) - (d) Reserved'),
                          ('(b)-(d)', '(b)-(d) Some Words'),
                          ('b. - d.', 'b. - d. Can be ignored'),
                          ('b.-d.', 'b.-d. Has no negative numbers'),
                          ('(b)', '(b) -1.0 is negative')):
         self.assertEqual(marker, marker_of(Node(text=text, label=['b'])))

示例#10

0

显示文件

文件： compiler.py 项目： kaitlin/regulations-parser

def overwrite_marker(origin, new_label):
    """ The node passed in has a label, but we're going to give it a
    new one (new_label). This is necessary during node moves.  """
    marker = marker_of(origin)
    if '(' in marker:
        origin.text = origin.text.replace(marker, '({0})'.format(new_label), 1)
    elif marker:
        origin.text = origin.text.replace(marker, '{0}.'.format(new_label), 1)
    else:
        logger.warning("Cannot replace marker in %s", origin.text)

    return origin

示例#11

0

显示文件

def create_add_amendment(amendment):
    """ An amendment comes in with a whole tree structure. We break apart the
    tree here (this is what flatten does), convert the Node objects to JSON
    representations. This ensures that each amendment only acts on one node.
    In addition, this futzes with the change's field when stars are present.
    """

    nodes_list = []
    flatten_tree(nodes_list, amendment['node'])
    changes = []
    for node in nodes_list:
        if node.label == amendment['node'].label:  # is root
            parent_label = amendment.get('parent_label')
        else:
            parent_label = None
        changes.append(format_node(node, amendment, parent_label))

    puts = [
        c for c in changes if any(v['action'] == 'PUT' for v in c.values())
    ]
    for change in puts:
        # This is awkward, but we know there will only be _one_ key in the
        # "changes" dictionary
        label = list(change.keys())[0]
        node = struct.find(amendment['node'], label)
        text = node.text.strip()
        marker = marker_of(node)
        text = text[len(marker):].strip()
        # Text is stars, but this is not the root. Explicitly try to keep
        # this node
        if text == '* * *':
            change[label]['action'] = Verb.KEEP

        # If text ends with a colon and is followed by stars, assume we are
        # only modifying the intro text
        if (text[-1:] == ':' and node.label == amendment['node'].label
                and node.source_xml is not None):
            following = node.source_xml.getnext()
            if following is not None and following.tag == 'STARS':
                change[label]['field'] = '[text]'

    return changes

示例#12

0

显示文件

文件： changes.py 项目： tadhg-ohiggins/regulations-parser

def create_add_amendment(amendment):
    """ An amendment comes in with a whole tree structure. We break apart the
    tree here (this is what flatten does), convert the Node objects to JSON
    representations. This ensures that each amendment only acts on one node.
    In addition, this futzes with the change's field when stars are present.
    """

    nodes_list = []
    flatten_tree(nodes_list, amendment['node'])
    changes = []
    for node in nodes_list:
        if node.label == amendment['node'].label:   # is root
            parent_label = amendment.get('parent_label')
        else:
            parent_label = None
        changes.append(format_node(node, amendment, parent_label))

    puts = [c for c in changes
            if any(v['action'] == 'PUT' for v in c.values())]
    for change in puts:
        # This is awkward, but we know there will only be _one_ key in the
        # "changes" dictionary
        label = list(change.keys())[0]
        node = struct.find(amendment['node'], label)
        text = node.text.strip()
        marker = marker_of(node)
        text = text[len(marker):].strip()
        # Text is stars, but this is not the root. Explicitly try to keep
        # this node
        if text == '* * *':
            change[label]['action'] = Verb.KEEP

        # If text ends with a colon and is followed by stars, assume we are
        # only modifying the intro text
        if (text[-1:] == ':' and node.label == amendment['node'].label and
                node.source_xml is not None):
            following = node.source_xml.getnext()
            if following is not None and following.tag == 'STARS':
                change[label]['field'] = '[text]'

    return changes