def test_02_record_provider_urls(self): record = {} urls = ["http://1", "http://2", "http://3"] recordmanager.record_provider_urls(record, urls) assert "provider" in record assert "url" in record["provider"] assert len(record["provider"]["url"]) == 3 urls.sort() record["provider"]["url"].sort() assert urls == record["provider"]["url"]
def provider_resolver(self, record): """ Take a pubmed id (if that is the type) and obtain a reference to the base URL of the resource that it links to and append it to the record['provider']['url'] list """ # check that we can actually work on this record # - must have an indentifier # - must be a pmid # - must have a canonical form if not "identifier" in record: return if not "type" in record["identifier"]: return if record["identifier"]["type"] != "pmid": return if not "canonical" in record["identifier"]: return # see if we can resolve a doi for the item canon = record['identifier']['canonical'] doi, loc = self._resolve_doi(canon) if loc is not None: # if we find something, record it recordmanager.record_provider_url(record, loc) recordmanager.record_provider_doi(record, doi) return # if we get to here, the DOI lookup failed, so we need to scrape the NCBI site for possible urls urls = self._scrape_urls(canon) if urls is not None and len(urls) > 0: # if we find something, record it recordmanager.record_provider_urls(record, urls)