def get_header(metaheaders, a, b):
	A = metaheaders.get_item(a)
	if A:
		return A
	B = metaheaders.get_item(b)
	if B:
		return B
	return None
metaheaders = metaheaders.MetaHeaders("http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=%d" % ar_number)

root = metaheaders.root

abstract = ''

abstractDiv = root.xpath("//a[@name='Abstract']/../*/text()")

if abstractDiv:
	abstract = abstractDiv[0]
	abstract = re.sub("^Abstract\s*", "", abstract).strip()

#print etree.tostring(root, pretty_print=True)

doi = metaheaders.get_item("citation_doi")
if not doi:
	aLinks = root.cssselect("a")

	for a in aLinks:
		if not a.attrib.has_key("href"):
			continue
		href = a.attrib["href"]
		if href.startswith("http://dx.doi.org/"):
			match = re.search(r'(10\..*)', href)
			if match:
				doi = match.group(1)
			break


Exemplo n.º 3
0
	<meta name="citation_issn" content="0025-5718">
	<meta name="citation_issn" content="1088-6842">
	<meta name="citation_author" content="LeVeque, Randall J.">
	<meta name="citation_author" content="Oliger, Joseph">
	<meta name="citation_title" content="Numerical methods based on additive splittings for hyperbolic partial differential equations">
	<meta name="citation_online_date" content="">
	<meta name="citation_publication_date" content="1983">
	<meta name="citation_volume" content="40">
	<meta name="citation_issue" content="162">
	<meta name="citation_firstpage" content="469">
	<meta name="citation_lastpage" content="497">
	<meta name="citation_doi" content="10.1090/S0025-5718-1983-0689466-8">
	<meta name="citation_abstract_html_url" content="http://www.ams.org/mcom/1983-40-162/S0025-5718-1983-0689466-8/">
"""

doi = metaheaders.get_item("citation_doi")

if not doi:
	bail('Unable to find a DOI')
	sys.exit(0)

print "begin_tsv"
print "linkout\tDOI\t\t%s\t\t" % (doi)
print "type\tJOUR"
print "doi\t" + doi
for f in key_map.keys():
	k = key_map[f]
	v = metaheaders.get_item(k)
	if not v:
		continue
	v = v.strip()
Exemplo n.º 4
0
opener = urllib2.build_opener(*handlers)
opener.addheaders = [("User-Agent", "CiteULike/1.0 +http://www.citeulike.org/")
                     ]
urllib2.install_opener(opener)

try:
    ris_file = urllib2.urlopen(ris_file_url).read()
except:
    bail("Could not fetch RIS file (" + ris_file_url + ")")

metaheaders = metaheaders.MetaHeaders(url)

print "begin_tsv"

if metaheaders.get_item("citation_conference") or metaheaders.get_item(
        "citation_conference_title"):
    print "type\tINCONF"
else:
    print "type\tJOUR"

doi = metaheaders.get_item("citation_doi")
if doi:
    doi = doi.replace("doi:", "")
    print "doi\t%s" % doi
    print "linkout\tDOI\t\t%s\t\t" % (doi)
else:
    bail("Couldn't find an DOI")
print "end_tsv"
print "begin_ris"
print "%s" % (ris_file)
Exemplo n.º 5
0
#
# DOI is in the page
#
metaheaders = metaheaders.MetaHeaders(page=page)
dois = metaheaders.get_multi_item("DC.identifier")
doi = None
if dois:
	for doi_str in dois:
		doi_match = re.search(r'doi:(10\.[^/]+/[^\s]+)', doi_str,  re.IGNORECASE)
		if doi_match:
			doi = doi_match.group(1)
if not doi:
	bail("Couldn't find a DOI")

if not metaheaders.get_item("DC.title"):
	bail("Unable to find the article title")

print "begin_tsv"
print "publisher\tDryad Digital Repository"
print "type\tGEN"

metaheaders.print_item("title","DC.title")
authors = metaheaders.get_multi_item("DC.creator")
if authors:
	for a in authors:
		print "author\t%s" % a
metaheaders.print_date("DCTERMS.issued")

abstract = metaheaders.get_item("DC.description");
if abstract:
Exemplo n.º 6
0
if not matched:
	bail("Cannot parse IUCR journal. Unrecognized URL: " + url + " - does the plugin need updating?")

#
# Fetch the page
#
try:
	page = urllib2.urlopen(url).read().strip()
except:
	bail("Couldn't fetch page (" + url + ")")

print "begin_tsv"

metaheaders = metaheaders.MetaHeaders(page=page)
if not doi:
	doiMatch = metaheaders.get_item("citation_doi");
	match = re.search(r'10.1107/([0-9a-zA-Z]+)', doiMatch, re.IGNORECASE)
	if match:
		doi = "10.1107/" + match.group(1)
		key = match.group(1)

if doi:
	print "linkout\tIUCR\t\t%s\t\t" % key
	print "linkout\tDOI\t\t%s\t\t" % doi
	print "url\thttp://dx.doi.org/" + doi
	print "doi\t" + doi
else:
	bail("Couldn't find a DOI")

if not metaheaders.get_item("DC.title"):
	bail("Cannot find a title in that article")
Exemplo n.º 7
0

opener=urllib2.build_opener(*handlers)
opener.addheaders = [("User-Agent", "CiteULike/1.0 +http://www.citeulike.org/")]
urllib2.install_opener(opener)

try:
	ris_file = urllib2.urlopen(ris_file_url).read()
except:
	bail("Could not fetch RIS file (" + ris_file_url + ")")

metaheaders = metaheaders.MetaHeaders(url)

print "begin_tsv"

if metaheaders.get_item("citation_conference") or metaheaders.get_item("citation_conference_title"):
	print "type\tINCONF"
else:
	print "type\tJOUR"

doi = metaheaders.get_item("citation_doi")
if doi:
	doi = doi.replace("doi:","")
	print "doi\t%s" % doi
	print "linkout\tDOI\t\t%s\t\t" % (doi)
else:
	bail("Couldn't find an DOI")
print "end_tsv"
print "begin_ris"
print "%s" % (ris_file)
print "end_ris"
dc.creator = ['D. G. Aggelis', 'N. K. Paschos', 'N. M. Barkoula', 'A. S. Paipetis', 'T. E. Matikas', 'A. D. Georgoulis']
"""


key_map = {
	"publisher" : "citation_publisher",
	"abstract" : "description",
	"issue": "citation_issue",
	"issn": "citation.issn",
	"title": "citation_title",
	"volume": "citation_volume",
	"start_page": "citation_firstpage",
	"end_page": "citation_lastpage"
}

doi = metaheaders.get_item("citation_doi")

if not doi:
	bail('Unable to find a DOI')
	sys.exit(0)

doi = doi.replace("doi:","")

print "begin_tsv"
print "linkout\tDOI\t\t%s\t\t" % (doi)
print "type\tJOUR"
print "doi\t" + doi
for f in key_map.keys():
	k = key_map[f]
	v = metaheaders.get_item(k)
	if not v:
Exemplo n.º 9
0
zid = record_match.group(1)
httpUrl = "http://" + url_host + "/record/" + zid;

#
# Fetch the page
#
try:
	page = urllib2.urlopen(httpUrl).read().strip()
except:
	bail("Couldn't fetch page (" + httpUrl + ")")

#
# DOI is in the page
#
metaheaders = metaheaders.MetaHeaders(page=page)
doi = metaheaders.get_item("citation_doi")

print "begin_tsv"
print "publisher\tZENODO"
print "type\tGEN"

if metaheaders.get_item("citation_title"):
	metaheaders.print_item("title","citation_title")
	authors = metaheaders.get_multi_item("citation_author")
	if authors:
		for a in authors:
			print "author\t%s" % a.encode('utf-8')
	metaheaders.print_date("citation_publication_date")
	zenodoURL = metaheaders.get_item("citation_abstract_html_url")

if zid:
Exemplo n.º 10
0
metaheaders = metaheaders.MetaHeaders(
    "http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=%d" % ar_number)

root = metaheaders.root

abstract = ''

abstractDiv = root.xpath("//a[@name='Abstract']/../*/text()")

if abstractDiv:
    abstract = abstractDiv[0]
    abstract = re.sub("^Abstract\s*", "", abstract).strip()

#print etree.tostring(root, pretty_print=True)

doi = metaheaders.get_item("citation_doi")
if not doi:
    aLinks = root.cssselect("a")

    for a in aLinks:
        if not a.attrib.has_key("href"):
            continue
        href = a.attrib["href"]
        if href.startswith("http://dx.doi.org/"):
            match = re.search(r'(10\..*)', href)
            if match:
                doi = match.group(1)
            break

print "begin_tsv"
Exemplo n.º 11
0
fg_descr = m.group(1)
fg_id = m.group(2)

#
# Fetch the page
#
try:
	page = urllib2.urlopen(url).read().strip()
except:
	bail("Couldn't fetch page (" + url + ")")

#
# DOI is in the page
#
metaheaders = metaheaders.MetaHeaders(page=page)
doi_str = metaheaders.get_item("citation_doi")
doi_match = re.search(r'doi:(10\.[^/]+/[^\s]+)', doi_str,  re.IGNORECASE)

if doi_match:
	doi = doi_match.group(1)
else:
	bail("Couldn't find an DOI")

root = metaheaders.root

abstractDiv = root.xpath("//div[@id='article_desc']/div/p/text()")

if abstractDiv:
	abstract = abstractDiv[0]
else:
	abstract = None
Exemplo n.º 12
0
	<meta name="citation_issn" content="0025-5718">
	<meta name="citation_issn" content="1088-6842">
	<meta name="citation_author" content="LeVeque, Randall J.">
	<meta name="citation_author" content="Oliger, Joseph">
	<meta name="citation_title" content="Numerical methods based on additive splittings for hyperbolic partial differential equations">
	<meta name="citation_online_date" content="">
	<meta name="citation_publication_date" content="1983">
	<meta name="citation_volume" content="40">
	<meta name="citation_issue" content="162">
	<meta name="citation_firstpage" content="469">
	<meta name="citation_lastpage" content="497">
	<meta name="citation_doi" content="10.1090/S0025-5718-1983-0689466-8">
	<meta name="citation_abstract_html_url" content="http://www.ams.org/mcom/1983-40-162/S0025-5718-1983-0689466-8/">
"""

doi = metaheaders.get_item("citation_doi")

if not doi:
    bail('Unable to find a DOI')
    sys.exit(0)

print "begin_tsv"
print "linkout\tDOI\t\t%s\t\t" % (doi)
print "type\tJOUR"
print "doi\t" + doi
for f in key_map.keys():
    k = key_map[f]
    v = metaheaders.get_item(k)
    if not v:
        continue
    v = v.strip()
Exemplo n.º 13
0
from cultools import urlparams, bail

import metaheaders

socket.setdefaulttimeout(15)

# Read URL from stdin
url = sys.stdin.readline().strip()

sys.stdout = codecs.getwriter('utf-8')(sys.stdout)

metaheaders = metaheaders.MetaHeaders(url, unescape_entities=True)

print "begin_tsv"

if metaheaders.get_item("citation_conference"):
    print "type\tINCONF"
else:
    print "type\tJOUR"

authors = metaheaders.get_multi_item("citation_author")
if authors:
    for a in authors:
        print "author\t%s" % a

metaheaders.print_item("title", "citation_title")
metaheaders.print_date("citation_publication_date")
metaheaders.print_item("volume", "citation_volume")
metaheaders.print_item("start_page", "citation_firstpage")
metaheaders.print_item("end_page", "citation_lastpage")
metaheaders.print_item("issue", "citation_issue")
# Read URL from stdin
url = sys.stdin.readline().strip()

u = urlparse(url)

# rewrite the URL - need ?isAuthorized=no to avoid redirect loop
url = "%s://%s%s?isAuthorized=no" % (u.scheme, u.netloc, u.path)

sys.stdout = codecs.getwriter('utf-8')(sys.stdout)

metaheaders = metaheaders.MetaHeaders(url)

print "begin_tsv"


if metaheaders.get_item("citation_conference"):
	print "type\tINCONF"
else:
	print "type\tJOUR"


authors = metaheaders.get_multi_item("citation_author")
if authors:
	for a in authors:
		print "author\t%s" % a

metaheaders.print_item("title","citation_title")
metaheaders.print_date("citation_date")
metaheaders.print_item("volume","citation_volume")
metaheaders.print_item("start_page","citation_firstpage")
metaheaders.print_item("end_page","citation_lastpage")
Exemplo n.º 15
0
# Fetch the page
#
try:
	page = urllib2.urlopen(url).read().strip()
except:
	bail("Couldn't fetch page (" + url + ")")

print "begin_tsv"
print "publisher\tFrontiers"

#
# DOI is in the page
#
metapropsheaders = metaheaders.MetaHeaders(name="property", page=page)
metaheaders = metaheaders.MetaHeaders(page=page)
doi = metaheaders.get_item("citation_doi")
if doi:
	print "linkout\tDOI\t\t%s\t\t" % doi
	print "linkout\tFRONT\t\t%s\t\t" % doi
else:
	bail("Couldn't find an DOI")

docType = metapropsheaders.get_item("og:type");
if not docType:
	bail("Cannot determine the publication type")

if docType != "article":
	bail("Only supports journal papers ('article', 'JOUR') at this moment, but found " + docType)

if not metaheaders.get_item("citation_title"):
	bail("Cannot find a title in that article")
Exemplo n.º 16
0
metaheaders = metaheaders.MetaHeaders(
    "http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=%d" % ar_number)

root = metaheaders.root

abstract = ''

abstractDiv = root.xpath("//a[@name='Abstract']/../*/text()")

if abstractDiv:
    abstract = abstractDiv[0]
    abstract = re.sub("^Abstract\s*", "", abstract).strip()

#print etree.tostring(root, pretty_print=True)

doi = metaheaders.get_item("citation_doi")
if not doi:
    aLinks = root.cssselect("a")

    for a in aLinks:
        if not a.attrib.has_key("href"):
            continue
        href = a.attrib["href"]
        if href.startswith("http://dx.doi.org/"):
            match = re.search(r'(10\..*)', href)
            if match:
                doi = match.group(1)
            break

print "begin_tsv"
Exemplo n.º 17
0
	"journal":  "citation_journal_title",
	"issue": "citation_issue",
	"title": "DC.Title",
	"volume": "citation_volume",
	"start_page": "citation_firstpage",
	"end_page": "citation_lastpage"
}

"""
   <meta content="2012-01-01" name="DC.Date"/>
        <meta content="eLife Sciences" name="citation_journal_title"/>
        <meta content="" name="citation_issn"/>
        <meta content="2050-084X" name="citation_issn"/>
        """

doi = metaheaders.get_item("DC.Identifier")

if not doi:
	bail('Unable to find a DOI')
	sys.exit(0)

print "begin_tsv"
print "linkout\tDOI\t\t%s\t\t" % (doi)
print "type\tJOUR"
print "doi\t" + doi
for f in key_map.keys():
	k = key_map[f]
	v = metaheaders.get_item(k)
	if not v:
		continue
	v = v.strip()
Exemplo n.º 18
0
#
# Fetch the page
#
try:
	page = urllib2.urlopen(url).read().strip()
except:
	bail("Couldn't fetch page (" + url + ")")

print "begin_tsv"
print "publisher\tNature Publishing Group"

#
# DOI is in the page
#
metaheaders = metaheaders.MetaHeaders(page=page)
doi_str = metaheaders.get_item("citation_doi")
doi_match = re.search(r'doi:(10\.[^/]+/[^\s]+)', doi_str,  re.IGNORECASE)

doi = None
if doi_match:
	doi = doi_match.group(1)
else:
	bail("Couldn't find an DOI")

if doi:
	print "linkout\tDOI\t\t%s\t\t" % doi
else:
	bail("Couldn't find an DOI")

print "linkout\tSCIDAT\t\t%s\t\t" % artId
Exemplo n.º 19
0
if not match:
	bail("Cannot parse this BioMed Central paper. Unrecognized URL: " + url + " - does the plugin need updating?")

#
# Fetch the page
#
try:
	page = urllib2.urlopen(url).read().strip()
except:
	bail("Couldn't fetch page (" + url + ")")

print "begin_tsv"

metaheaders = metaheaders.MetaHeaders(page=page)

pmid = metaheaders.get_item("citation_pmid");
if pmid:
	print "linkout\tPMID\t%s\t\t\t" % pmid

doi = metaheaders.get_item("citation_doi");
if doi:
	print "linkout\tDOI\t\t%s\t\t" % doi
	print "url\thttp://dx.doi.org/" + doi
	print "doi\t" + doi
else:
	bail("Couldn't find a DOI")

if not metaheaders.get_item("citation_title"):
	bail("Cannot find a title in that article")

title = metaheaders.get_item("citation_title")
Exemplo n.º 20
0
metaheaders = metaheaders.MetaHeaders("http://ieeexplore.ieee.org/xpl/freeabs_all.jsp?arnumber=%d" % ar_number)

root = metaheaders.root

abstract = ''

abstractDiv = root.xpath("//a[@name='Abstract']/../*/text()")

if abstractDiv:
	abstract = abstractDiv[0]
	abstract = re.sub("^Abstract\s*", "", abstract).strip()

#print etree.tostring(root, pretty_print=True)

doi = metaheaders.get_item("citation_doi")
if not doi:
	aLinks = root.cssselect("a")

	for a in aLinks:
		if not a.attrib.has_key("href"):
			continue
		href = a.attrib["href"]
		if href.startswith("http://dx.doi.org/"):
			match = re.search(r'(10\..*)', href)
			if match:
				doi = match.group(1)
			break


Exemplo n.º 21
0
# Fetch the page
#
try:
	page = urllib2.urlopen(url).read().strip()
except:
	bail("Couldn't fetch page (" + url + ")")

print "begin_tsv"
print "publisher\tPeerJ Inc."

#
# DOI is in the page
#
metapropsheaders = metaheaders.MetaHeaders(name="property", page=page)
metaheaders = metaheaders.MetaHeaders(page=page)
doi = metaheaders.get_item("citation_doi")
if doi:
	print "linkout\tDOI\t\t%s\t\t" % doi
	print "linkout\tPEERJP\t\t%s\t\t" % artId
else:
	bail("Couldn't find an DOI")

docType = metapropsheaders.get_item("og:type");
if not docType:
	bail("Cannot determine the publication type")

if docType != "article":
	bail("Only supports journal papers ('article', 'JOUR') at this moment, but found " + docType)

if not metaheaders.get_item("citation_title"):
	bail("Cannot find a title in that article")
Exemplo n.º 22
0
citation_doi = ['doi:10.1121/1.3571537']
dc.creator = ['D. G. Aggelis', 'N. K. Paschos', 'N. M. Barkoula', 'A. S. Paipetis', 'T. E. Matikas', 'A. D. Georgoulis']
"""

key_map = {
    "publisher": "citation_publisher",
    "abstract": "description",
    "issue": "citation_issue",
    "issn": "citation.issn",
    "title": "citation_title",
    "volume": "citation_volume",
    "start_page": "citation_firstpage",
    "end_page": "citation_lastpage"
}

doi = metaheaders.get_item("citation_doi")

if not doi:
    bail('Unable to find a DOI')
    sys.exit(0)

doi = doi.replace("doi:", "")

print "begin_tsv"
print "linkout\tDOI\t\t%s\t\t" % (doi)
print "type\tJOUR"
print "doi\t" + doi
for f in key_map.keys():
    k = key_map[f]
    v = metaheaders.get_item(k)
    if not v: