def arxiv_derive_inspire_categories(obj, eng): """Derive ``inspire_categories`` from the arXiv categories. Uses side effects to populate the ``inspire_categories`` key in ``obj.data`` by converting its arXiv categories. Args: obj (WorkflowObject): a workflow object. eng (WorkflowEngine): a workflow engine. Returns: None """ obj.data.setdefault('inspire_categories', []) for arxiv_category in get_arxiv_categories(obj.data): term = classify_field(arxiv_category) if term: inspire_category = { 'source': 'arxiv', 'term': term, } if inspire_category not in obj.data['inspire_categories']: obj.data['inspire_categories'].append(inspire_category)
def arxiv_derive_inspire_categories(obj, eng): """Derive ``inspire_categories`` from the arXiv categories. Uses side effects to populate the ``inspire_categories`` key in ``obj.data`` by converting its arXiv categories. Args: obj (WorkflowObject): a workflow object. eng (WorkflowEngine): a workflow engine. Returns: None """ obj.data.setdefault('inspire_categories', []) for arxiv_category in get_arxiv_categories(obj.data): term = classify_field(arxiv_category) if term: inspire_category = { 'source': 'arxiv', 'term': term, } if inspire_category not in obj.data['inspire_categories']: obj.data['inspire_categories'].append(inspire_category)
def inspire_categories(self, key, value): schema = load_schema('elements/inspire_field') valid_sources = schema['properties']['source']['enum'] inspire_categories = self.get('inspire_categories', []) scheme = force_single_element(value.get('2')) if scheme == 'arXiv': # XXX: we skip arXiv categories here because return inspire_categories # we're going to add them later in a filter. source = force_single_element(value.get('9', '')).lower() if source not in valid_sources: if source == 'automatically added based on dcc, ppf, dk': source = 'curator' elif source == 'submitter': source = 'user' else: source = None terms = force_list(value.get('a')) for _term in terms: term = classify_field(_term) if term: inspire_categories.append({ 'term': term, 'source': source, }) return inspire_categories
def parse(self): """Extract an arXiv record into an Inspire HEP record. Returns: dict: the same record in the Inspire Literature schema. """ self.builder.add_abstract(abstract=self.abstract, source=self.source) self.builder.add_title(title=self.title, source=self.source) for license in self.licenses: self.builder.add_license(**license) for author in self.authors: self.builder.add_author(author) self.builder.add_number_of_pages(self.number_of_pages) self.builder.add_publication_info(**self.publication_info) for collab in self.collaborations: self.builder.add_collaboration(collab) for doi in self.dois: self.builder.add_doi(**doi) self.builder.add_preprint_date(self.preprint_date) if self.public_note: self.builder.add_public_note(self.public_note, self.source) for rep_number in self.report_numbers: self.builder.add_report_number(rep_number, self.source) self.builder.add_arxiv_eprint(self.arxiv_eprint, self.arxiv_categories) self.builder.add_private_note(self.private_note) self.builder.add_document_type(self.document_type) normalized_categories = [ classify_field(arxiv_cat) for arxiv_cat in self.arxiv_categories ] self.builder.add_inspire_categories(dedupe_list(normalized_categories), 'arxiv') return self.builder.record
def test_classify_field_ignores_case(): expected = 'Astrophysics' result = utils.classify_field('ASTRO-PH.CO') assert expected == result
def test_classify_field_returns_other_if_category_not_found(): expected = 'Other' result = utils.classify_field('quant-bio') assert expected == result
def test_classify_field_normalizes_arxiv_category(): expected = 'Math and Math Physics' result = utils.classify_field('math-dg') assert expected == result
def test_classify_field_returns_category_for_inspire_category(): expected = 'Astrophysics' result = utils.classify_field('Astrophysics') assert expected == result
def test_classify_field_returns_category_for_arxiv_category(): expected = 'Math and Math Physics' result = utils.classify_field('math.AG') assert expected == result
def test_classify_field_returns_none_on_non_string_value(): assert utils.classify_field(0) is None
def test_classify_field_returns_none_on_falsy_value(): assert utils.classify_field('') is None