def persistent_identifiers(self, key, value): """Persistent Standard Identifiers.""" value = utils.force_list(value) dois = self.get('dois', []) persistent_identifiers = self.get('persistent_identifiers', []) for val in value: if val: items = utils.force_list(val.get('a')) if val.get("2") and val.get("2", '').lower() == "doi": for v in items: dois.append({ 'value': v, 'source': val.get('9') }) else: for v in items: persistent_identifiers.append({ 'value': v, 'source': val.get('9'), 'type': val.get('2') }) if dois: self['dois'] = inspire_dojson_utils.remove_duplicates_from_list_of_dicts(dois) return inspire_dojson_utils.remove_duplicates_from_list_of_dicts(persistent_identifiers)
def report_numbers(self, key, value): """Report numbers and arXiv numbers from 037.""" def get_value(value): return { 'source': value.get('9'), 'value': value.get('a', value.get('z')), } def get_value_arxiv(value): return { 'value': value.get('a'), 'categories': utils.force_list(value.get('c')), } report_number = self.get('report_numbers', []) arxiv_eprints = self.get('arxiv_eprints', []) value = utils.force_list(value) for element in value: if element.get('9') and element.get('9') == 'arXiv' and 'c' in element: arxiv_eprints.append(get_value_arxiv(element)) else: report_number.append(get_value(element)) self['arxiv_eprints'] = inspire_dojson_utils.remove_duplicates_from_list_of_dicts(arxiv_eprints) return inspire_dojson_utils.remove_duplicates_from_list_of_dicts(report_number)
def collaboration(self, key, value): """Added Entry-Corporate Name.""" value = utils.force_list(value) def get_value(value): recid = None if '0' in value: try: recid = int(value.get('0')) except: pass return { 'value': value.get('g'), 'record': inspire_dojson_utils.get_record_ref(recid, 'experiments') } collaboration = self.get('collaboration', []) filtered_value = inspire_dojson_utils.remove_duplicates_from_list_of_dicts( value) for element in filtered_value: collaboration.append(get_value(element)) return collaboration
def collections(self, key, value): """Collection this record belongs to.""" value = utils.force_list(value) def get_value(value): primary = '' if isinstance(value.get('a'), list): primary = value.get('a')[0] else: primary = value.get('a') return { 'primary': primary, 'secondary': value.get('b'), 'deleted': value.get('c'), } collections = self.get('collections', []) for val in value: collections.append(get_value(val)) contains_list = False for element in collections: for k, v in enumerate(element): if isinstance(element[v], list): contains_list = True break if contains_list: return strip_empty_values(collections) else: return inspire_dojson_utils.remove_duplicates_from_list_of_dicts( collections)
def clean_dois(sender, *args, **kwargs): filtered_dois = [] if 'dois' in sender: for element in sender['dois']: filtered_dois.append(dict( (key, value) for key, value in element.items() if key == 'value')) sender['dois'] = inspire_dojson_utils.remove_duplicates_from_list_of_dicts( filtered_dois)
def test_remove_duplicates_from_list_of_dicts_preserving_order(): """Remove duplicates from a list of dictionaries preserving the order.""" list_of_dicts_with_duplicates = [ {'a': 123, 'b': 1234}, {'a': 3222, 'b': 1234}, {'a': 123, 'b': 1234} ] expected = [{'a': 123, 'b': 1234}, {'a': 3222, 'b': 1234}] result = utils.remove_duplicates_from_list_of_dicts(list_of_dicts_with_duplicates) assert expected == result
def keywords(self, key, value): """Field code.""" def get_value(value): return { 'value': value.get('a'), 'source': value.get('9') } value = utils.force_list(value) keywords = self.get('keywords', []) for val in value: keywords.append(get_value(val)) return inspire_dojson_utils.remove_duplicates_from_list_of_dicts( keywords)
def source(self, key, value): def get_value(value): return { 'name': value.get('a'), 'date_verified': value.get('d'), } source = self.get('source', []) value = utils.force_list(value) for val in value: source.append(get_value(val)) return inspire_dojson_utils.remove_duplicates_from_list_of_dicts( source)
def free_keywords(self, key, value): """Free keywords.""" value = utils.force_list(value) def get_value(value): return { 'value': value.get('a'), 'source': value.get('9'), } free_keywords = self.get('free_keywords', []) for val in value: free_keywords.append(get_value(val)) return inspire_dojson_utils.remove_duplicates_from_list_of_dicts( free_keywords)
def subject_terms(self, key, value): """Subject Added Entry-Topical Term.""" value = utils.force_list(value) def get_value(value): return { 'term': value.get('a'), 'scheme': value.get('2'), 'source': value.get('9'), } subject_terms = self.get('subject_terms', []) for val in value: subject_terms.append(get_value(val)) return inspire_dojson_utils.remove_duplicates_from_list_of_dicts( subject_terms)
def external_system_numbers(self, key, value): """System Control Number.""" value = utils.force_list(value) def get_value(value): return { 'value': value.get('a'), 'institute': value.get('9'), 'obsolete': bool(value.get('z')), } external_system_numbers = self.get('external_system_numbers', []) for val in value: external_system_numbers.append(get_value(val)) return inspire_dojson_utils.remove_duplicates_from_list_of_dicts( external_system_numbers)
def test_remove_duplicates_from_list_of_dicts_preserving_order(): """Remove duplicates from a list of dictionaries preserving the order.""" list_of_dicts_with_duplicates = [{ 'a': 123, 'b': 1234 }, { 'a': 3222, 'b': 1234 }, { 'a': 123, 'b': 1234 }] expected = [{'a': 123, 'b': 1234}, {'a': 3222, 'b': 1234}] result = utils.remove_duplicates_from_list_of_dicts( list_of_dicts_with_duplicates) assert expected == result
def thesaurus_terms(self, key, value): """Controlled keywords.""" value = utils.force_list(value) def get_value(value): try: energy_range = int(value.get('e')) except (TypeError, ValueError): energy_range = None return { 'keyword': value.get('a'), 'energy_range': energy_range, 'classification_scheme': value.get('2'), } thesaurus_terms = self.get('thesaurus_terms', []) for element in value: thesaurus_terms.append(get_value(element)) return inspire_dojson_utils.remove_duplicates_from_list_of_dicts( thesaurus_terms)