from regparser import api_stub from regparser.citations import internal_citations, Label from regparser.grammar.external_citations import regtext_external_citation from regparser.layer import key_terms from regparser.tree import struct def generate_key_terms_layer(xml_based_reg_json): layer_generator = key_terms.KeyTerms(xml_based_reg_json) return layer_generator.build() # We're not going to use our heuristic to determine key terms for paragraphs # this has already properly been done for. xml_based_reg = api_stub.get_regulation_as_json('/tmp/xtree.json') real_key_terms_layer = generate_key_terms_layer(xml_based_reg) layer = {} part_end = '1005.' exclude_parser = (regtext_external_citation | Literal("U.S.")) period = re.compile(r'\.(?!,)') # Not followed by a comma def generate_keyterm(node): label_id = node.label_id() if label_id in real_key_terms_layer: layer[label_id] = real_key_terms_layer[label_id] else: node_text = key_terms.KeyTerms.process_node_text(node) if not node_text:
print NodeEncoder().encode(toc) def generate_interpretations(reg): """ Generate the Interpretations layer """ layer_generator = interpretations.Interpretations(reg) print NodeEncoder().encode(layer_generator.build()) def generate_terms(reg): """ Generate the Terms layer """ layer_generator = terms.Terms(reg) print NodeEncoder().encode(layer_generator.build()) def generate_key_terms(reg): """ Generate the key terms layer """ layer_generator = key_terms.KeyTerms(reg) layer_generator.build() print NodeEncoder().encode(layer_generator.build()) if __name__ == "__main__": reg_json = api_stub.get_regulation_as_json('/tmp/xtree.json') # generate_table_of_contents(reg_json) # generate_internal_citations(reg_json) # generate_external_citations(reg_json) # generate_interpretations(reg_json) # generate_terms(reg_json) generate_key_terms(reg_json)
from pyparsing import Literal from regparser import api_stub from regparser.citations import internal_citations, Label from regparser.grammar.external_citations import regtext_external_citation from regparser.layer import key_terms from regparser.tree import struct def generate_key_terms_layer(xml_based_reg_json): layer_generator = key_terms.KeyTerms(xml_based_reg_json) return layer_generator.build() # We're not going to use our heuristic to determine key terms for paragraphs # this has already properly been done for. xml_based_reg = api_stub.get_regulation_as_json('/tmp/xtree.json') real_key_terms_layer = generate_key_terms_layer(xml_based_reg) layer = {} part_end = '1005.' exclude_parser = ( regtext_external_citation | Literal("U.S.") ) period = re.compile(r'\.(?!,)') # Not followed by a comma def generate_keyterm(node): label_id = node.label_id() if label_id in real_key_terms_layer: layer[label_id] = real_key_terms_layer[label_id]
toc = layer_generator.build() print NodeEncoder().encode(toc) def generate_interpretations(reg): """ Generate the Interpretations layer """ layer_generator = interpretations.Interpretations(reg) print NodeEncoder().encode(layer_generator.build()) def generate_terms(reg): """ Generate the Terms layer """ layer_generator = terms.Terms(reg) print NodeEncoder().encode(layer_generator.build()) def generate_key_terms(reg): """ Generate the key terms layer """ layer_generator = key_terms.KeyTerms(reg) layer_generator.build() print NodeEncoder().encode(layer_generator.build()) if __name__ == "__main__": reg_json = api_stub.get_regulation_as_json('/tmp/xtree.json') # generate_table_of_contents(reg_json) # generate_internal_citations(reg_json) # generate_external_citations(reg_json) # generate_interpretations(reg_json) # generate_terms(reg_json) generate_key_terms(reg_json)