def homepage(self): """ Extract an URL for that researcher (if any) """ lst = jpath('orcid-profile/orcid-bio/researcher-urls/researcher-url', self.json, default=[]) for url in lst: val = jpath('url/value', url) name = jpath('url-name/value', url) if name is not None and ('home' in name.lower() or 'personal' in name.lower()): return urlize(val) if len(lst): return urlize(jpath('url/value', lst[0])) or None
def homepage(self): """ Extract an URL for that researcher (if any) """ lst = jpath('person/researcher-urls/researcher-url', self.json, default=[]) for url in lst: val = jpath('url/value', url) name = jpath('url-name', url) if name is not None and ('home' in name.lower() or 'personal' in name.lower()): return urlize(val) if len(lst): return urlize(jpath('url/value', lst[0])) or None
def test_urlize(self): self.assertEqual(urlize('gnu.org'), 'http://gnu.org') self.assertTrue(urlize(None) is None) self.assertEqual(urlize(u'https://gnu.org'), 'https://gnu.org')
def search_by_name(first, last, instance=settings.ORCID_BASE_DOMAIN): """ Searches for an ORCID profile matching this (first,last) name. Returns a list of such ORCID profiles. """ # Validate arguments if not last: return # Perform query base_base_pub = "https://pub." + instance + "/" baseurl = base_base_pub + 'v1.2/search/orcid-bio/' dct = { 'rows': 10, 'start': 0, 'q': 'family-name:%s given-names:%s' % (last, first), } url = baseurl + '?' + urlencode(dct) try: r = requests.get(url) # the namespace is the same for both the production and the # sandbox versions. ns = {'ns': 'http://www.orcid.org/ns/orcid'} xml = etree.fromstring(r.text.encode('utf-8')) for elem in xml.xpath('//ns:orcid-search-result', namespaces=ns): candidateFirst = None candidateLast = None # Get name pers_details = elem.xpath('.//ns:personal-details', namespaces=ns) if not pers_details: continue for item in pers_details[0]: if item.tag.endswith('given-names'): candidateFirst = item.text elif item.tag.endswith('family-name'): candidateLast = item.text if not candidateFirst or not candidateLast: continue # Check that the names are compatible if shallower_name_similarity( (first, last), (candidateFirst, candidateLast)) == 0: continue # Get ORCID iD orcid_elem = elem.xpath( './ns:orcid-profile/ns:orcid-identifier/ns:path', namespaces=ns) if not orcid_elem: continue orcid = orcid_elem[0].text # Add other things lst = elem.xpath( './ns:orcid-profile/ns:orcid-bio/ns:researcher-urls/ns:researcher-url/ns:url/text()', namespaces=ns) homepage = None for url in lst: homepage = urlize(url) break keywords = elem.xpath( './ns:orcid-profile/ns:orcid-bio/ns:keywords/ns:keyword/text()', namespaces=ns) yield { 'first': candidateFirst, 'last': candidateLast, 'orcid': orcid, 'homepage': homepage, 'keywords': keywords, } except etree.XMLSyntaxError as e: print e except requests.exceptions.RequestException as e: print e
def search_by_name(first, last): """ Searches for an ORCID profile matching this (first,last) name. Returns a list of such ORCID profiles. """ # Validate arguments if not last: return # Perform query baseurl = 'http://pub.orcid.org/v1.2/search/orcid-bio/' dct = { 'rows':10, 'start':0, 'q':'family-name:%s given-names:%s' % (last,first), } url = baseurl+'?'+urlencode(dct) try: r = requests.get(url) ns = {'ns':'http://www.orcid.org/ns/orcid' } xml = etree.fromstring(r.text.encode('utf-8')) for elem in xml.xpath('//ns:orcid-search-result', namespaces=ns): candidateFirst = None candidateLast = None # Get name pers_details = elem.xpath('.//ns:personal-details', namespaces=ns) if not pers_details: continue for item in pers_details[0]: if item.tag.endswith('given-names'): candidateFirst = item.text elif item.tag.endswith('family-name'): candidateLast = item.text if not candidateFirst or not candidateLast: continue # Check that the names are compatible if shallower_name_similarity((first,last),(candidateFirst,candidateLast)) == 0: continue # Get ORCID iD orcid_elem = elem.xpath('./ns:orcid-profile/ns:orcid-identifier/ns:path', namespaces=ns) if not orcid_elem: continue orcid = orcid_elem[0].text # Add other things lst = elem.xpath('./ns:orcid-profile/ns:orcid-bio/ns:researcher-urls/ns:researcher-url/ns:url/text()', namespaces=ns) homepage = None for url in lst: homepage = urlize(url) break keywords = elem.xpath('./ns:orcid-profile/ns:orcid-bio/ns:keywords/ns:keyword/text()', namespaces=ns) yield { 'first':candidateFirst, 'last':candidateLast, 'orcid':orcid, 'homepage':homepage, 'keywords':keywords, } except etree.XMLSyntaxError as e: print e except requests.exceptions.RequestException as e: print e