Пример #1
0
def main():
    # Form the output DOM.
    dom = etree.Element("level")
    dom.set("type", "document")
    _make_node(dom, "heading", "Code of the District of Columbia")
    meta = _make_node(dom, "meta", None)
    _make_node(meta, "recency", sys.argv[2] if len(sys.argv) > 2 else "xxx")

    start_time = time.time()
    DIR = sys.argv[1]
    try:
        all_file_names = os.listdir(DIR)
    except NotADirectoryError:
        file_paths = [DIR]
    else:
        file_paths = [
            os.path.join(DIR, fn) for fn in all_file_names
            if fn.endswith('.docx')
        ]
    start_para_index = 0
    for fp in file_paths:
        start_para_index = parse_file(dom, fp, start_para_index)

    # print(time.time() - start_time)
    # Output, being careful we get UTF-8 to the byte stream.
    sys.stdout.buffer.write(
        etree.tostring(dom,
                       pretty_print=True,
                       encoding="utf-8",
                       xml_declaration=True))
Пример #2
0
def main():
    # Form the output DOM.
    dom = etree.Element("code")
    _make_node(dom, "heading", "Code of the District of Columbia")
    meta = _make_node(dom, "meta", None)
    recency = etree.fromstring(sys.argv[2] if len(sys.argv) > 2 else """
    <recency>
      <law>
        <law>20-241</law>
        <effective>2015-04-13</effective>
      </law>
      <emergency>
        <law>20-617</law>
        <effective>2015-01-28</effective>
      </emergency>
      <federal>
        <law>113-235</law>
        <effective>2014-12-16</effective>
      </federal>
    </recency>
""")
    meta.append(recency)
    start_time = time.time()
    DIR = sys.argv[1]
    try:
        all_file_names = os.listdir(DIR)
    except NotADirectoryError:
        file_paths = [DIR]
    else:
        file_paths = [
            os.path.join(DIR, fn) for fn in all_file_names
            if fn.endswith('.docx')
        ]
    start_para_index = 0
    for fp in file_paths:
        start_para_index = parse_file(dom, fp, start_para_index)

    # print(time.time() - start_time)
    # Output, being careful we get UTF-8 to the byte stream.
    sys.stdout.buffer.write(
        etree.tostring(dom,
                       pretty_print=True,
                       encoding="utf-8",
                       xml_declaration=True))
def main():
	# Form the output DOM.
	dom = etree.Element("level")
	dom.set("type", "document")
	_make_node(dom, "heading", "Code of the District of Columbia")
	meta = _make_node(dom, "meta", None)
	_make_node(meta, "recency", sys.argv[2] if len(sys.argv) > 2 else "xxx")

	start_time = time.time()
	DIR = sys.argv[1]
	try:
		all_file_names = os.listdir(DIR)
	except NotADirectoryError:
		file_paths = [DIR]
	else:
		file_paths = [os.path.join(DIR, fn) for fn in all_file_names if fn.endswith('.docx')]
	start_para_index = 0
	for fp in file_paths:
		start_para_index = parse_file(dom, fp, start_para_index)

	# print(time.time() - start_time)
	# Output, being careful we get UTF-8 to the byte stream.
	sys.stdout.buffer.write(etree.tostring(dom, pretty_print=True, encoding="utf-8", xml_declaration=True))
def main():
	# Form the output DOM.
	dom = etree.Element("code")
	_make_node(dom, "heading", "Code of the District of Columbia")
	meta = _make_node(dom, "meta", None)
	recency = etree.fromstring(sys.argv[2] if len(sys.argv) > 2 else """
    <recency>
      <law>
        <law>20-241</law>
        <effective>2015-04-13</effective>
      </law>
      <emergency>
        <law>20-617</law>
        <effective>2015-01-28</effective>
      </emergency>
      <federal>
        <law>113-235</law>
        <effective>2014-12-16</effective>
      </federal>
    </recency>
""")
	meta.append(recency)
	start_time = time.time()
	DIR = sys.argv[1]
	try:
		all_file_names = os.listdir(DIR)
	except NotADirectoryError:
		file_paths = [DIR]
	else:
		file_paths = [os.path.join(DIR, fn) for fn in all_file_names if fn.endswith('.docx')]
	start_para_index = 0
	for fp in file_paths:
		start_para_index = parse_file(dom, fp, start_para_index)

	# print(time.time() - start_time)
	# Output, being careful we get UTF-8 to the byte stream.
	sys.stdout.buffer.write(etree.tostring(dom, pretty_print=True, encoding="utf-8", xml_declaration=True))