else: bail("Couldn't find an DOI") docType = metapropsheaders.get_item("og:type"); if not docType: bail("Cannot determine the publication type") if docType != "article": bail("Only supports journal papers ('article', 'JOUR') at this moment, but found " + docType) if not metaheaders.get_item("citation_title"): bail("Cannot find a title in that article") print "type\tREP" metaheaders.print_item("title","citation_title") authors = metaheaders.get_multi_item("citation_author") if authors: for a in authors: print "author\t%s" % a journal = metaheaders.get_item("citation_technical_report_institution") if journal: print "journal\t%s" % journal issn = metaheaders.get_item("citation_issn") if issn: print "issn\t%s" % issn abstract = metaheaders.get_item("description") if abstract:
for a in aLinks: if not a.attrib.has_key("href"): continue href = a.attrib["href"] if href.startswith("http://dx.doi.org/"): match = re.search(r'(10\..*)', href) if match: doi = match.group(1) break print "begin_tsv" print "type\tJOUR" if True and metaheaders.get_item("citation_title"): metaheaders.print_item("title", "citation_title") metaheaders.print_item("publisher", "citation_publisher") authors = metaheaders.get_multi_item("citation_author") if authors: for a in authors: print "author\t%s" % a else: metaheaders.print_item("author", "citation_authors") metaheaders.print_item("volume", "citation_volume") metaheaders.print_item("issue", "citation_issue") metaheaders.print_item("start_page", "citation_firstpage") metaheaders.print_item("end_page", "citation_lastpage") # "serial" or "issn". Do both, to be safe metaheaders.print_item("serial", "citation_issn") metaheaders.print_item("issn", "citation_issn") metaheaders.print_item("isbn", "citation_isbn")
href = a.attrib["href"] if href.startswith("http://dx.doi.org/"): match = re.search(r'(10\..*)', href) if match: doi = match.group(1) break print "begin_tsv" print "type\tJOUR" if True and metaheaders.get_item("citation_title"): metaheaders.print_item("title","citation_title") metaheaders.print_item("publisher","citation_publisher") authors = metaheaders.get_multi_item("citation_author") if authors: for a in authors: print "author\t%s" % a else: metaheaders.print_item("author","citation_authors") metaheaders.print_item("volume","citation_volume") metaheaders.print_item("issue","citation_issue") metaheaders.print_item("start_page","citation_firstpage") metaheaders.print_item("end_page","citation_lastpage") # "serial" or "issn". Do both, to be safe metaheaders.print_item("serial","citation_issn") metaheaders.print_item("issn","citation_issn") metaheaders.print_item("isbn","citation_isbn")
if dois: for doi_str in dois: doi_match = re.search(r'doi:(10\.[^/]+/[^\s]+)', doi_str, re.IGNORECASE) if doi_match: doi = doi_match.group(1) if not doi: bail("Couldn't find a DOI") if not metaheaders.get_item("DC.title"): bail("Unable to find the article title") print "begin_tsv" print "publisher\tDryad Digital Repository" print "type\tGEN" metaheaders.print_item("title","DC.title") authors = metaheaders.get_multi_item("DC.creator") if authors: for a in authors: print "author\t%s" % a metaheaders.print_date("DCTERMS.issued") abstract = metaheaders.get_item("DC.description"); if abstract: print "abstract\t%s" % abstract print "linkout\tDRYAD\t\t%s\t\t" % (doi) print "linkout\tDOI\t\t%s\t\t" % (doi) print "end_tsv" print "status\tok"
metaheaders = metaheaders.MetaHeaders(url, unescape_entities=True) print "begin_tsv" if metaheaders.get_item("citation_conference"): print "type\tINCONF" else: print "type\tJOUR" authors = metaheaders.get_multi_item("citation_author") if authors: for a in authors: print "author\t%s" % a metaheaders.print_item("title", "citation_title") metaheaders.print_date("citation_publication_date") metaheaders.print_item("volume", "citation_volume") metaheaders.print_item("start_page", "citation_firstpage") metaheaders.print_item("end_page", "citation_lastpage") metaheaders.print_item("issue", "citation_issue") metaheaders.print_item("serial", "citation.issn") publisher = metaheaders.get_item("citation_publisher") if publisher: print "publisher\t%s" % publisher.strip() metaheaders.print_item("abstract", "description") metaheaders.print_item("journal", "citation_journal_title") metaheaders.print_item("title_secondary", "citation_conference") doi = metaheaders.get_item("citation_doi")
print "begin_tsv" if metaheaders.get_item("citation_conference"): print "type\tINCONF" else: print "type\tJOUR" authors = metaheaders.get_multi_item("citation_author") if authors: for a in authors: print "author\t%s" % a metaheaders.print_item("title","citation_title") metaheaders.print_date("citation_date") metaheaders.print_item("volume","citation_volume") metaheaders.print_item("start_page","citation_firstpage") metaheaders.print_item("end_page","citation_lastpage") metaheaders.print_item("issue","citation_issue") metaheaders.print_item("serial","citation.issn") publisher = metaheaders.get_item("citation_publisher") if publisher: publisher = publisher.replace("COPYRIGHT SPIE--","") publisher = publisher.replace("Downloading of the abstract is permitted for personal use only.","") print "publisher\t%s" % publisher.strip() metaheaders.print_item("abstract","description") metaheaders.print_item("journal","citation_journal_title") metaheaders.print_item("title_secondary","citation_conference")