def journal_issn(soup, pub_format, pub_type): if pub_format is None and pub_type is None: # return the first issn tag found regardless of which type return first(extract_nodes(soup, "issn")) elif pub_format is not None: return first( extract_nodes(soup, "issn", attr="publication-format", value=pub_format)) elif pub_type is not None: return first( extract_nodes(soup, "issn", attr="pub-type", value=pub_type))
def custom_meta(soup, meta_name=None): custom_meta_tags = extract_nodes(soup, "custom-meta") if meta_name is not None: custom_meta_tags = [ tag for tag in custom_meta_tags if node_contents_str( first(extract_nodes(tag, "meta-name"))) == meta_name ] return custom_meta_tags
def research_organism_keywords(soup): tags = first( extract_nodes(soup, "kwd-group", attr="kwd-group-type", value="research-organism")) if not tags: return None return [tag for tag in tags if tag.name == "kwd"] or None
def label(soup): return first(extract_nodes(soup, "label"))
def caption(soup): return first(extract_nodes(soup, "caption"))
def article_title(soup): return first(extract_nodes(soup, "article-title"))
def month(soup): return first(extract_nodes(soup, "month"))
def article_body(soup): return first(extract_nodes(soup, "body"))
def author_response(soup): return first(sub_article(soup, "reply"))
def copyright_statement(soup): return first(extract_nodes(soup, "copyright-statement"))
def publisher_id(soup): article_id_tags = article_id(soup, pub_id_type="publisher-id") # the first article-id tag whose parent is article-meta return first( [tag for tag in article_id_tags if tag.parent.name == "article-meta"])
def title(soup): return first(extract_nodes(soup, "title"))
def licence_url(soup): "License url attribute of the license tag" if licence(soup): return first(licence(soup)).get("xlink:href")
def article_permissions(soup): # a better selector might be "article-meta.permissions" permissions_tags = permissions(soup) return first( [tag for tag in permissions_tags if tag.parent.name == "article-meta"])
def acknowledgements(soup): return first(extract_nodes(soup, "ack"))
def year(soup): return first(extract_nodes(soup, "year"))
def author_notes(soup): return first(extract_nodes(soup, "author-notes"))
def copyright_year(soup): return first(extract_nodes(soup, "copyright-year"))
def doi(soup): doi_tags = article_id(soup, pub_id_type="doi") # the first article-id tag whose parent is article-meta return first( [tag for tag in doi_tags if tag.parent.name == "article-meta"])
def copyright_holder(soup): return first(extract_nodes(soup, "copyright-holder"))
def back(soup): return first(extract_nodes(soup, "back"))
def funding_statement(soup): return first(extract_nodes(soup, "funding-statement"))
def decision_letter(soup): tag = first(sub_article(soup, "article-commentary")) if not tag: tag = first(sub_article(soup, "decision-letter")) return tag
def publisher(soup): return first(extract_nodes(soup, "publisher-name"))
def journal_title(soup): return first(extract_nodes(soup, "journal-title"))
def history_date(soup, date_type): date_tags = date(soup, date_type) return first([tag for tag in date_tags if tag.parent.name == "history"])
def article_meta(soup): return first(extract_nodes(soup, "article-meta"))
def day(soup): return first(extract_nodes(soup, "day"))
def article_type(soup): # returns raw data, just that the data doesn't contain any BS nodes return first(extract_nodes(soup, "article")).get("article-type")
def test_first(self, value, expected): self.assertEqual(utils.first(value), expected)