Пример #1
0
def extract_revision_text(remark_el):
    remark_children = minidom_children(remark_el)
    if len(remark_children) == 1 and remark_children[0].tagName == "p":
        return minidom_get_text(remark_children[0])
    if len(remark_children) == 0:
        return minidom_get_text(remark_el) or None
    if minidom_get_text(remark_el).strip():
        return None
    lines = []
    for child in remark_children:
        if child.tagName == "p":
            lines.append(minidom_get_text(child))

    return "\n".join(lines)
Пример #2
0
def extract_revision_text(remark_el):
    remark_children = minidom_children(remark_el)
    if len(remark_children) == 1 and remark_children[0].tagName == "p":
        return minidom_get_text(remark_children[0])
    if len(remark_children) == 0:
        return minidom_get_text(remark_el) or None
    if minidom_get_text(remark_el).strip():
        return None
    lines = []
    for child in remark_children:
        if child.tagName == "p":
            lines.append(minidom_get_text(child))
        elif child.tagName == "ul":
            for ul_child in minidom_children(child):
                if ul_child.tagName == "li":
                    lines.append("* {}".format(minidom_get_text(ul_child)))

    return "\n".join(lines)
Пример #3
0
def extract_xep_metadata(document):
    header = minidom_find_header(document)

    latest_revision = minidom_find_child(header, "revision")
    if latest_revision is not None:
        last_revision_version = minidom_get_text(
            minidom_find_child(latest_revision, "version"))
        last_revision_date = minidom_get_text(
            minidom_find_child(latest_revision, "date"))
        remark_el = minidom_find_child(latest_revision, "remark")
        last_revision_remark = None
        if remark_el is not None:
            last_revision_remark = extract_revision_text(remark_el)

        if last_revision_remark is not None:
            initials_el = minidom_find_child(latest_revision, "initials")
            last_revision_initials = initials_el and minidom_get_text(
                initials_el)
        else:
            last_revision_initials = None
    else:
        last_revision_version = None
        last_revision_date = None
        last_revision_remark = None
        last_revision_initials = None

    status = minidom_get_text(minidom_find_child(header, "status"))
    type_ = minidom_get_text(minidom_find_child(header, "type"))
    abstract = " ".join(
        minidom_get_text(minidom_find_child(header, "abstract")).split())
    sig_el = minidom_find_child(header, "sig")
    if sig_el is None:
        sig = None
    else:
        sig = minidom_get_text(sig_el)
    shortname = minidom_get_text(minidom_find_child(header, "shortname"))
    if shortname.replace("-", " ").replace("_", " ").lower() in [
            "not yet assigned", "n/a", "none", "to be assigned", "to be issued"
    ]:
        shortname = None
    title = minidom_get_text(minidom_find_child(header, "title"))

    approver_el = minidom_find_child(header, "approver")
    if approver_el is not None:
        approver = minidom_get_text(approver_el)
    else:
        approver = "Board" if type_ == "Procedural" else "Council"

    last_call_el = minidom_find_child(header, "lastcall")
    if last_call_el is not None:
        last_call = minidom_get_text(last_call_el)
    else:
        last_call = None

    return {
        "last_revision": {
            "version": last_revision_version,
            "date": last_revision_date,
            "initials": last_revision_initials,
            "remark": last_revision_remark,
        },
        "status": status,
        "type": type_,
        "sig": sig,
        "abstract": abstract,
        "shortname": shortname,
        "title": title,
        "approver": approver,
        "last_call": last_call,
    }
Пример #4
0
def extract_xep_metadata(document):
    header = minidom_find_header(document)

    latest_revision = minidom_find_child(header, "revision")
    if latest_revision is not None:
        last_revision_version = minidom_get_text(
            minidom_find_child(latest_revision, "version")
        )
        last_revision_date = minidom_get_text(
            minidom_find_child(latest_revision, "date")
        )
        remark_el = minidom_find_child(latest_revision, "remark")
        last_revision_remark = None
        if remark_el is not None:
            last_revision_remark = extract_revision_text(remark_el)

        if last_revision_remark is not None:
            initials_el = minidom_find_child(latest_revision, "initials")
            last_revision_initials = initials_el and minidom_get_text(
                initials_el
            )
        else:
            last_revision_initials = None
    else:
        last_revision_version = None
        last_revision_date = None
        last_revision_remark = None
        last_revision_initials = None

    status = minidom_get_text(minidom_find_child(header, "status"))
    type_ = minidom_get_text(minidom_find_child(header, "type"))
    abstract = " ".join(minidom_get_text(
        minidom_find_child(header, "abstract")
    ).split())
    sig_el = minidom_find_child(header, "sig")
    if sig_el is None:
        sig = None
    else:
        sig = minidom_get_text(sig_el)
    shortname = minidom_get_text(minidom_find_child(header, "shortname"))
    if shortname.replace("-", " ").replace("_", " ").lower() in [
            "not yet assigned", "n/a", "none", "to be assigned",
            "to be issued"]:
        shortname = None
    title = minidom_get_text(minidom_find_child(header, "title"))

    approver_el = minidom_find_child(header, "approver")
    if approver_el is not None:
        approver = minidom_get_text(approver_el)
    else:
        approver = "Board" if type_ == "Procedural" else "Council"

    last_call_el = minidom_find_child(header, "lastcall")
    if last_call_el is not None:
        last_call = minidom_get_text(last_call_el)
    else:
        last_call = None

    return {
        "last_revision": {
            "version": last_revision_version,
            "date": last_revision_date,
            "initials": last_revision_initials,
            "remark": last_revision_remark,
        },
        "status": status,
        "type": type_,
        "sig": sig,
        "abstract": abstract,
        "shortname": shortname,
        "title": title,
        "approver": approver,
        "last_call": last_call,
    }