Пример #1
0
 def provider_dereference(self, record):
     """
     Check the URL that the DOI dereferences to, by taking advantage of the fact that
     DOI lookups use HTTP 303 to redirect you to the resource. Append to the record['provider']['url']
     list with the string which describes the provider (ideally a URI)
     """
     # check that we can actually work on this record
     # - must have an indentifier
     # - must be a doi
     # - must have a canonical form
     if not "identifier" in record:
         return
     
     if not "type" in record["identifier"]:
         return
     
     if record["identifier"]["type"] != "doi":
         return
     
     if not "canonical" in record["identifier"]:
         return
     
     # first construct a dereferenceable doi (prefix it with dx.doi.org)
     canon = record['identifier']['canonical']
     loc = self.dereference(canon)
     
     # either way we are going to copy the doi into the provider object
     recordmanager.record_provider_doi(record, canon)
     
     if loc is None:
         return
     
     # if we find something, record it
     recordmanager.record_provider_url(record, loc)
Пример #2
0
 def provider_resolver(self, record):
     """
     Take a pubmed id (if that is the type) and obtain a reference to the base
     URL of the resource that it links to and append it to the record['provider']['url'] list
     """
     # check that we can actually work on this record
     # - must have an indentifier
     # - must be a pmid
     # - must have a canonical form
     if not "identifier" in record:
         return
     
     if not "type" in record["identifier"]:
         return
     
     if record["identifier"]["type"] != "pmid":
         return
     
     if not "canonical" in record["identifier"]:
         return
     
     # see if we can resolve a doi for the item
     canon = record['identifier']['canonical']
     doi, loc = self._resolve_doi(canon)
     
     if loc is not None:
         # if we find something, record it
         recordmanager.record_provider_url(record, loc)
         recordmanager.record_provider_doi(record, doi)
         return
     
     # if we get to here, the DOI lookup failed, so we need to scrape the NCBI site for possible urls
     urls = self._scrape_urls(canon)
     if urls is not None and len(urls) > 0:
         # if we find something, record it
         recordmanager.record_provider_urls(record, urls)