def keyterm_in_node(cls, node, ignore_definitions=True): tagged = node.tagged_text.replace(marker_of(node), '', 1).strip() keyterm = keyterm_in_text(tagged) if keyterm and not (ignore_definitions and cls.is_definition(node, keyterm)): return keyterm
def process_node_text(node): """Take a paragraph, remove the marker, and extraneous whitespaces.""" marker = marker_of(node) text = node.tagged_text text = text.replace(marker, '', 1).strip() return text
def create_add_amendment(amendment): """ An amendment comes in with a whole tree structure. We break apart the tree here (this is what flatten does), convert the Node objects to JSON representations. This ensures that each amendment only acts on one node. In addition, this futzes with the change's field when stars are present. """ nodes_list = [] flatten_tree(nodes_list, amendment['node']) changes = [format_node(n, amendment) for n in nodes_list] for change in filter(lambda c: c.values()[0]['action'] == 'PUT', changes): label = change.keys()[0] node = struct.find(amendment['node'], label) text = node.text.strip() marker = marker_of(node) text = text[len(marker):].strip() # Text is stars, but this is not the root. Explicitly try to keep # this node if text == '* * *': change[label]['action'] = Verb.KEEP # If text ends with a colon and is followed by stars, assume we are # only modifying the intro text if (text[-1:] == ':' and node.label == amendment['node'].label and node.source_xml is not None): following = node.source_xml.getnext() if following is not None and following.tag == 'STARS': change[label]['field'] = '[text]' return changes
def create_add_amendment(amendment): """ An amendment comes in with a whole tree structure. We break apart the tree here (this is what flatten does), convert the Node objects to JSON representations. This ensures that each amendment only acts on one node. In addition, this futzes with the change's field when stars are present. """ nodes_list = [] flatten_tree(nodes_list, amendment['node']) changes = [format_node(n, amendment) for n in nodes_list] for change in filter(lambda c: c.values()[0]['action'] == 'PUT', changes): label = change.keys()[0] node = struct.find(amendment['node'], label) text = node.text.strip() marker = marker_of(node) text = text[len(marker):].strip() # Text is stars, but this is not the root. Explicitly try to keep # this node if text == '* * *': change[label]['action'] = 'KEEP' # If text ends with a colon and is followed by stars, assume we are # only modifying the intro text if (text[-1:] == ':' and node.label == amendment['node'].label and node.source_xml is not None): following = node.source_xml.getnext() if following is not None and following.tag == 'STARS': change[label]['field'] = '[text]' return changes
def create_add_amendment(amendment): """ An amendment comes in with a whole tree structure. We break apart the tree here (this is what flatten does), convert the Node objects to JSON representations. This ensures that each amendment only acts on one node. In addition, this futzes with the change's field when stars are present. """ nodes_list = [] flatten_tree(nodes_list, amendment['node']) nodes = [format_node(n, amendment) for n in nodes_list] label = amendment['node'].label_id() root_change = [d for d in nodes if label in d][0][label] text = amendment['node'].text.strip() marker = marker_of(amendment['node']) text = text[len(marker):].strip() # Text is stars, but this is marked as edited -- assume we are only # modifying the child paragraphs if text == '* * *': root_change['field'] = '[children]' # If text ends with a colon and is followed by stars, assume we are only # modifying the intro text if text[-1:] == ':' and amendment['node'].source_xml is not None: following = amendment['node'].source_xml.getnext() if following is not None and following.tag == 'STARS': root_change['field'] = '[text]' return nodes
def keyterm_in_node(cls, node, ignore_definitions=True): tagged = (getattr(node, 'tagged_text', None) or '') tagged = tagged.replace(marker_of(node), '', 1).strip() keyterm = keyterm_in_text(tagged) if keyterm and not (ignore_definitions and cls.is_definition(node, keyterm)): return keyterm
def test_marker_of_range(self): """In addition to single paragraph markers, we should account for cases of multiple markers being present. We've encountered this for "Reserved" paragraphs, but there are likely other scenarios""" for marker, text in (('(b) - (d)', '(b) - (d) Reserved'), ('(b)-(d)', '(b)-(d) Some Words'), ('b. - d.', 'b. - d. Can be ignored'), ('b.-d.', 'b.-d. Has no negative numbers'), ('(b)', '(b) -1.0 is negative')): self.assertEqual(marker, marker_of(Node(text=text, label=['b'])))
def overwrite_marker(origin, new_label): """ The node passed in has a label, but we're going to give it a new one (new_label). This is necessary during node moves. """ marker = marker_of(origin) if '(' in marker: origin.text = origin.text.replace(marker, '({0})'.format(new_label), 1) elif marker: origin.text = origin.text.replace(marker, '{0}.'.format(new_label), 1) else: logger.warning("Cannot replace marker in %s", origin.text) return origin
def create_add_amendment(amendment): """ An amendment comes in with a whole tree structure. We break apart the tree here (this is what flatten does), convert the Node objects to JSON representations. This ensures that each amendment only acts on one node. In addition, this futzes with the change's field when stars are present. """ nodes_list = [] flatten_tree(nodes_list, amendment['node']) changes = [] for node in nodes_list: if node.label == amendment['node'].label: # is root parent_label = amendment.get('parent_label') else: parent_label = None changes.append(format_node(node, amendment, parent_label)) puts = [ c for c in changes if any(v['action'] == 'PUT' for v in c.values()) ] for change in puts: # This is awkward, but we know there will only be _one_ key in the # "changes" dictionary label = list(change.keys())[0] node = struct.find(amendment['node'], label) text = node.text.strip() marker = marker_of(node) text = text[len(marker):].strip() # Text is stars, but this is not the root. Explicitly try to keep # this node if text == '* * *': change[label]['action'] = Verb.KEEP # If text ends with a colon and is followed by stars, assume we are # only modifying the intro text if (text[-1:] == ':' and node.label == amendment['node'].label and node.source_xml is not None): following = node.source_xml.getnext() if following is not None and following.tag == 'STARS': change[label]['field'] = '[text]' return changes
def create_add_amendment(amendment): """ An amendment comes in with a whole tree structure. We break apart the tree here (this is what flatten does), convert the Node objects to JSON representations. This ensures that each amendment only acts on one node. In addition, this futzes with the change's field when stars are present. """ nodes_list = [] flatten_tree(nodes_list, amendment['node']) changes = [] for node in nodes_list: if node.label == amendment['node'].label: # is root parent_label = amendment.get('parent_label') else: parent_label = None changes.append(format_node(node, amendment, parent_label)) puts = [c for c in changes if any(v['action'] == 'PUT' for v in c.values())] for change in puts: # This is awkward, but we know there will only be _one_ key in the # "changes" dictionary label = list(change.keys())[0] node = struct.find(amendment['node'], label) text = node.text.strip() marker = marker_of(node) text = text[len(marker):].strip() # Text is stars, but this is not the root. Explicitly try to keep # this node if text == '* * *': change[label]['action'] = Verb.KEEP # If text ends with a colon and is followed by stars, assume we are # only modifying the intro text if (text[-1:] == ':' and node.label == amendment['node'].label and node.source_xml is not None): following = node.source_xml.getnext() if following is not None and following.tag == 'STARS': change[label]['field'] = '[text]' return changes