def license_detect(self, record): relevant_publishers = [] for bu in self._base_urls: cburl = self.clean_url(bu) relevant_publishers += Publisher.find_by_journal_url('http://' + cburl) lic_statements = [] for pub in relevant_publishers: for l in pub['licenses']: lic_statement = {} lic_statement[l['license_statement']] = {'type': l['license_type'], 'version': l.get('version', '')} lic_statements.append(lic_statement) if not lic_statements: lic_statements = self._license_mappings for url in record.provider_urls: if self.supports_base_url(url): # try the fulltext first if not url.endswith('/fulltext.html'): url_with_suffix = url + '/fulltext.html' else: url_with_suffix = url self.simple_extract(lic_statements, record, url_with_suffix) if not record.has_license() and not record.was_licensed(): # if not successful, try the URL as-is self.simple_extract(lic_statements, record, url) return (self._short_name, self.__version__)
def license_detect(self, record): relevant_publishers = [] for bu in self._base_urls: cburl = self.clean_url(bu) relevant_publishers += Publisher.find_by_journal_url('http://' + cburl) lic_statements = [] for pub in relevant_publishers: for l in pub['licenses']: lic_statement = {} lic_statement[l['license_statement']] = {'type': l['license_type'], 'version': l.get('version', '')} lic_statements.append(lic_statement) if not lic_statements: lic_statements = self._license_mappings for url in record.provider_urls: if self.supports_base_url(url): # TODO refactor self.simple_extract into several pieces # a downloader, a matcher, and a f() that records the license info # so the first two (and perhaps a general version of the third) # can be used here instead of this plugin having to do # all the work itself. r, content, content_length = http_stream_get(url) extra_provenance = { "accepted_author_manuscript": False } for amm in self._author_manuscript_mappings: statement = amm.keys()[0].strip() if statement in content: extra_provenance = amm[statement] break self.simple_extract(lic_statements, record, url, content=content, extra_provenance=extra_provenance) return self._short_name, self.__version__