def main(): # Form the output DOM. dom = etree.Element("level") dom.set("type", "document") _make_node(dom, "heading", "Code of the District of Columbia") meta = _make_node(dom, "meta", None) _make_node(meta, "recency", sys.argv[2] if len(sys.argv) > 2 else "xxx") start_time = time.time() DIR = sys.argv[1] try: all_file_names = os.listdir(DIR) except NotADirectoryError: file_paths = [DIR] else: file_paths = [ os.path.join(DIR, fn) for fn in all_file_names if fn.endswith('.docx') ] start_para_index = 0 for fp in file_paths: start_para_index = parse_file(dom, fp, start_para_index) # print(time.time() - start_time) # Output, being careful we get UTF-8 to the byte stream. sys.stdout.buffer.write( etree.tostring(dom, pretty_print=True, encoding="utf-8", xml_declaration=True))
def main(): # Form the output DOM. dom = etree.Element("code") _make_node(dom, "heading", "Code of the District of Columbia") meta = _make_node(dom, "meta", None) recency = etree.fromstring(sys.argv[2] if len(sys.argv) > 2 else """ <recency> <law> <law>20-241</law> <effective>2015-04-13</effective> </law> <emergency> <law>20-617</law> <effective>2015-01-28</effective> </emergency> <federal> <law>113-235</law> <effective>2014-12-16</effective> </federal> </recency> """) meta.append(recency) start_time = time.time() DIR = sys.argv[1] try: all_file_names = os.listdir(DIR) except NotADirectoryError: file_paths = [DIR] else: file_paths = [ os.path.join(DIR, fn) for fn in all_file_names if fn.endswith('.docx') ] start_para_index = 0 for fp in file_paths: start_para_index = parse_file(dom, fp, start_para_index) # print(time.time() - start_time) # Output, being careful we get UTF-8 to the byte stream. sys.stdout.buffer.write( etree.tostring(dom, pretty_print=True, encoding="utf-8", xml_declaration=True))
def main(): # Form the output DOM. dom = etree.Element("level") dom.set("type", "document") _make_node(dom, "heading", "Code of the District of Columbia") meta = _make_node(dom, "meta", None) _make_node(meta, "recency", sys.argv[2] if len(sys.argv) > 2 else "xxx") start_time = time.time() DIR = sys.argv[1] try: all_file_names = os.listdir(DIR) except NotADirectoryError: file_paths = [DIR] else: file_paths = [os.path.join(DIR, fn) for fn in all_file_names if fn.endswith('.docx')] start_para_index = 0 for fp in file_paths: start_para_index = parse_file(dom, fp, start_para_index) # print(time.time() - start_time) # Output, being careful we get UTF-8 to the byte stream. sys.stdout.buffer.write(etree.tostring(dom, pretty_print=True, encoding="utf-8", xml_declaration=True))
def main(): # Form the output DOM. dom = etree.Element("code") _make_node(dom, "heading", "Code of the District of Columbia") meta = _make_node(dom, "meta", None) recency = etree.fromstring(sys.argv[2] if len(sys.argv) > 2 else """ <recency> <law> <law>20-241</law> <effective>2015-04-13</effective> </law> <emergency> <law>20-617</law> <effective>2015-01-28</effective> </emergency> <federal> <law>113-235</law> <effective>2014-12-16</effective> </federal> </recency> """) meta.append(recency) start_time = time.time() DIR = sys.argv[1] try: all_file_names = os.listdir(DIR) except NotADirectoryError: file_paths = [DIR] else: file_paths = [os.path.join(DIR, fn) for fn in all_file_names if fn.endswith('.docx')] start_para_index = 0 for fp in file_paths: start_para_index = parse_file(dom, fp, start_para_index) # print(time.time() - start_time) # Output, being careful we get UTF-8 to the byte stream. sys.stdout.buffer.write(etree.tostring(dom, pretty_print=True, encoding="utf-8", xml_declaration=True))