Пример #1
0
def conflict(soup):
    conflict_tags = extract_nodes(soup, "fn", attr="fn-type", value="conflict")
    conflict_tags += extract_nodes(soup,
                                   "fn",
                                   attr="fn-type",
                                   value="COI-statement")
    return conflict_tags
Пример #2
0
def custom_meta(soup, meta_name=None):
    custom_meta_tags = extract_nodes(soup, "custom-meta")
    if meta_name is not None:
        custom_meta_tags = [
            tag for tag in custom_meta_tags if node_contents_str(
                first(extract_nodes(tag, "meta-name"))) == meta_name
        ]
    return custom_meta_tags
Пример #3
0
def fn_group(soup, content_type=None):
    if content_type:
        return extract_nodes(soup,
                             "fn-group",
                             attr="content-type",
                             value=content_type)
    else:
        return extract_nodes(soup, "fn-group")
Пример #4
0
def pub_id(soup, pub_id_type=None):
    if pub_id_type:
        return extract_nodes(soup,
                             "pub-id",
                             attr="pub-id-type",
                             value=pub_id_type)
    else:
        return extract_nodes(soup, "pub-id")
Пример #5
0
def ext_link(soup, ext_link_type=None):
    if ext_link_type:
        return extract_nodes(soup,
                             "ext-link",
                             attr="ext-link-type",
                             value=ext_link_type)
    else:
        return extract_nodes(soup, "ext-link")
Пример #6
0
def abstract(soup, abstract_type=None):
    if abstract_type:
        return extract_nodes(soup,
                             "abstract",
                             attr="abstract-type",
                             value=abstract_type)
    else:
        return extract_nodes(soup, "abstract")
Пример #7
0
def authors(soup, contrib_type="author"):
    if contrib_type:
        return extract_nodes(soup,
                             "contrib",
                             attr="contrib-type",
                             value=contrib_type)
    else:
        return extract_nodes(soup, "contrib")
Пример #8
0
def pub_date(soup, date_type=None, pub_type=None):
    if date_type is not None:
        return extract_nodes(soup,
                             "pub-date",
                             attr="date-type",
                             value=date_type)
    elif pub_type is not None:
        return extract_nodes(soup, "pub-date", attr="pub-type", value=pub_type)
    else:
        return extract_nodes(soup, "pub-date")
Пример #9
0
def journal_issn(soup, pub_format, pub_type):
    if pub_format is None and pub_type is None:
        # return the first issn tag found regardless of which type
        return first(extract_nodes(soup, "issn"))
    elif pub_format is not None:
        return first(
            extract_nodes(soup,
                          "issn",
                          attr="publication-format",
                          value=pub_format))
    elif pub_type is not None:
        return first(
            extract_nodes(soup, "issn", attr="pub-type", value=pub_type))
Пример #10
0
def journal_id(soup):
    # the first non-nil tag
    return firstnn(
        extract_nodes(soup,
                      "journal-id",
                      attr="journal-id-type",
                      value="publisher-id"))
Пример #11
0
def author_keywords(soup):
    # A few articles have kwd-group with no kwd-group-type, so account for those
    tags = extract_nodes(soup, "kwd-group")
    keyword_tags = []
    for tag in tags:
        if (tag.get("kwd-group-type") == "author-keywords"
                or tag.get("kwd-group-type") is None):
            keyword_tags += [tag for tag in tag if tag.name == "kwd"]
    return keyword_tags
Пример #12
0
def article_contributors(soup):
    article_meta_tag = article_meta(soup)
    if article_meta_tag:
        contributor_tags = extract_nodes(article_meta_tag,
                                         ["contrib", "on-behalf-of"])
        return [
            tag for tag in contributor_tags
            if tag.parent.name == "contrib-group"
        ]
Пример #13
0
def research_organism_keywords(soup):
    tags = first(
        extract_nodes(soup,
                      "kwd-group",
                      attr="kwd-group-type",
                      value="research-organism"))
    if not tags:
        return None
    return [tag for tag in tags if tag.name == "kwd"] or None
Пример #14
0
def full_subject_area(soup, subject_group_type=None):

    subject_group_tags = extract_nodes(soup, "subj-group")
    subject_group_tags = [
        tag for tag in subject_group_tags
        if tag.parent.name == "article-categories"
        and tag.parent.parent.name == "article-meta"
    ]

    if subject_group_type:
        subject_group_tags = list(
            filter(
                lambda tag: tag.get("subj-group-type" == subject_group_type)))

    return subject_group_tags
Пример #15
0
def subject_area(soup, subject_group_type=None):
    # Supports all subject areas or just particular ones filtered by
    subject_area_tags = []
    tags = extract_nodes(soup, "subject")

    subject_area_tags = [
        tag for tag in tags if tag.parent.name == "subj-group"
        and tag.parent.parent.name == "article-categories"
        and tag.parent.parent.parent.name == "article-meta"
    ]
    if subject_group_type:
        subject_area_tags = [
            tag for tag in tags
            if tag.parent.get("subj-group-type") == subject_group_type
        ]
    return subject_area_tags
Пример #16
0
def fig_group(soup):
    return extract_nodes(soup, "fig-group")
Пример #17
0
def string_name(soup):
    return extract_nodes(soup, "string-name")
Пример #18
0
def principal_award_recipient(soup):
    return extract_nodes(soup, "principal-award-recipient")
Пример #19
0
def award_group(soup):
    return extract_nodes(soup, "award-group")
Пример #20
0
def funding_group(soup):
    return extract_nodes(soup, "funding-group")
Пример #21
0
def journal_title(soup):
    return first(extract_nodes(soup, "journal-title"))
Пример #22
0
def article_type(soup):
    # returns raw data, just that the data doesn't contain any BS nodes
    return first(extract_nodes(soup, "article")).get("article-type")
Пример #23
0
def article_meta(soup):
    return first(extract_nodes(soup, "article-meta"))
Пример #24
0
def fig(soup):
    return extract_nodes(soup, "fig")
Пример #25
0
def math(soup):
    return extract_nodes(soup, "math")
Пример #26
0
def disp_formula(soup):
    return extract_nodes(soup, "disp-formula")
Пример #27
0
def publisher(soup):
    return first(extract_nodes(soup, "publisher-name"))
Пример #28
0
def list(soup):  # Redefining `list` could be problematic
    return extract_nodes(soup, "list")
Пример #29
0
def boxed_text(soup):
    return extract_nodes(soup, "boxed-text")
Пример #30
0
def list_item(soup):
    return extract_nodes(soup, "list-item")