Exemplos de get_license em Python

Linguagem de programação: Python

Espaço para nome / nome do pacote: hepcrawl.utils

Método / Função: get_license

Exemplos em hotexamples.com: 2

get_license em Python - 2 exemplos encontrados. Esses são os exemplos do mundo real mais bem avaliados de hepcrawl.utils.get_license em Python extraídos de projetos de código aberto. Você pode avaliar os exemplos para nos ajudar a melhorar a qualidade deles.

Exemplo n.º 1

0

Exibir arquivo

Arquivo: oup_parser.py Projeto: SCOAP3/hepcrawl

def parse_node(self, response, node): """Parse a OUP XML file into a HEP record.""" node.remove_namespaces() record = HEPLoader(item=HEPRecord(), selector=node, response=response) dois = node.xpath("//article-id[@pub-id-type='doi']/text()").extract() record.add_value('dois', dois) raw_article_type = node.xpath('./@article-type').extract() article_type = map(lambda x: self.article_type_mapping.get(x, 'other'), raw_article_type) record.add_value('journal_doctype', article_type) if 'other' in article_type: logger.warning( 'There are unmapped article types for article %s with types %s.' % (dois, raw_article_type)) if article_type in ['correction', 'addendum']: logger.info('Adding related_article_doi.') record.add_xpath('related_article_doi', "//related-article[@ext-link-type='doi']/@href") arxiv_eprints = self.get_arxiv_eprints(node) if not arxiv_eprints: logger.warning('No arxiv eprints found for article %s.' % dois) else: record.add_value('arxiv_eprints', arxiv_eprints) page_nr = node.xpath("//counts/page-count/@count") if page_nr: try: page_nr = map(int, page_nr.extract()) record.add_value('page_nr', page_nr) except ValueError as e: logger.error( 'Failed to parse last_page or first_page for article %s: %s' % (dois, e)) record.add_xpath('abstract', '//abstract[1]') record.add_xpath('title', '//article-title/text()') record.add_xpath('subtitle', '//subtitle/text()') authors = self._get_authors(node) if not authors: logger.error('No authors found for article %s.' % dois) record.add_value('authors', authors) record.add_xpath('collaborations', "//contrib/collab/text()") record.add_value('date_published', self._get_published_date(node)) journal_title = '//abbrev-journal-title/text()|//journal-title/text()' record.add_xpath('journal_title', journal_title) record.add_xpath('journal_issue', '//issue/text()') record.add_xpath('journal_volume', '//volume/text()') record.add_xpath('journal_artid', '//elocation-id/text()') published_date = self._get_published_date(node) volume = self.get_volume_year(node) record.add_value('journal_year', int(volume)) record.add_value('date_published', published_date) record.add_xpath('copyright_holder', '//copyright-holder/text()') record.add_xpath('copyright_year', '//copyright-year/text()') record.add_xpath('copyright_statement', '//copyright-statement/text()') license = get_license(license_url=node.xpath( '//license/license-p/ext-link/text()').extract_first()) record.add_value('license', license) record.add_value('collections', ['Progress of Theoretical and Experimental Physics']) # local file paths local_files = [] if 'xml_url' in response.meta: local_files.append({ 'filetype': 'xml', 'path': response.meta['xml_url'] }) if 'pdf_url' in response.meta: local_files.append({ 'filetype': 'pdf', 'path': response.meta['pdf_url'] }) if 'pdfa_url' in response.meta: local_files.append({ 'filetype': 'pdf/a', 'path': response.meta['pdfa_url'] }) record.add_value('local_files', local_files) return dict(record.load_item())

Exemplo n.º 2

0

Exibir arquivo

def parse(self, response): """Parse a APS JSON file into a HEP record.""" aps_response = json.loads(response.body_as_unicode()) for article in aps_response['data']: record = HEPLoader(item=HEPRecord(), response=response) dois = get_nested(article, 'identifiers', 'doi') record.add_value('dois', dois) journal_doctype = self.article_type_mapping.get( article.get('articleType'), 'other') if journal_doctype == 'other': logger.warning( 'Journal_doctype is %s for article %s. Do we need other mapping for this?' % (journal_doctype, dois)) record.add_value('journal_doctype', journal_doctype) page_nr = article.get('numPages') if page_nr is not None: record.add_value('page_nr', page_nr) arxiv = get_nested(article, 'identifiers', 'arxiv').replace('arXiv:', '') if not arxiv: logger.warning('No arxiv eprints found for article %s.' % dois) else: record.add_value('arxiv_eprints', {'value': arxiv}) record.add_value('abstract', get_nested(article, 'abstract', 'value')) record.add_value('title', get_nested(article, 'title', 'value')) authors, collaborations = self._get_authors_and_collab( article, dois) record.add_value('authors', authors) record.add_value('collaborations', collaborations) record.add_value('journal_title', get_nested(article, 'journal', 'name')) record.add_value('journal_issue', get_nested(article, 'issue', 'number')) record.add_value('journal_volume', get_nested(article, 'volume', 'number')) published_date = article['date'] record.add_value('journal_year', int(published_date[:4])) record.add_value('date_published', published_date) record.add_value('field_categories', [{ 'term': term.get('label'), 'scheme': 'APS', 'source': '', } for term in get_nested(article, 'classificationSchemes', 'subjectAreas')]) copyright_holders = get_nested(article, 'rights', 'copyrightHolders') if copyright_holders: record.add_value('copyright_holder', copyright_holders[0]['name']) record.add_value( 'copyright_year', str(get_nested(article, 'rights', 'copyrightYear'))) record.add_value('copyright_statement', get_nested(article, 'rights', 'rightsStatement')) license = get_license(license_url=get_nested( article, 'rights', 'licenses')[0]['url']) record.add_value('license', license) record.add_value('collections', ['HEP', 'Citeable', 'Published']) yield record.load_item() # Pagination support. Will yield until no more "next" pages are found if 'Link' in response.headers: links = link_header.parse(response.headers['Link']) next = links.links_by_attr_pairs([('rel', 'next')]) if next: next_url = next[0].href yield Request(next_url)