def related_records(self, key, value): """Translates related_records field. RELATED records """ _migration = self["_migration"] _related = _migration["related"] relation_type = OTHER_RELATION.name relation_description = None try: if key == "775__" and "b" in value: description = clean_val("b", value, str) relation_description = description relation_type_tag = clean_val("x", value, str) if relation_type_tag: if relation_type_tag.lower() == "edition": relation_type = EDITION_RELATION.name elif relation_type_tag.lower() == "language": relation_type = LANGUAGE_RELATION.name if key == "787__" and "i" in value: clean_val("i", value, str, manual=True) _related.append({ "related_recid": clean_val("w", value, str, req=True), "relation_type": relation_type, "relation_description": relation_description, }) _migration.update({"related": _related, "has_related": True}) raise IgnoreKey("_migration") except ManualImportRequired as e: if key == "775__": e.subfield = "b or c" else: e.subfield = "i" raise e
def publication_additional(self, key, value): """Translates additional publication info & other related_records field.""" _publication_info = self.get("publication_info", []) _migration = self["_migration"] _related = _migration["related"] empty = not bool(_publication_info) for i, v in enumerate(force_list(value)): temp_info = {} pages = clean_pages_range("k", v) if pages: temp_info.update(pages) rel_recid = clean_val("b", v, str) if rel_recid: _related.append({ "related_recid": rel_recid, "relation_type": OTHER_RELATION.name, "relation_description": "chapter of" }) _migration.update({"related": _related, "has_related": True}) n_subfield = clean_val("n", v, str) if n_subfield.upper() == "BOOK": temp_info.update({"material": "BOOK"}) else: _conference_info = self.get("conference_info", {}) _identifiers = _conference_info.get("identifiers", []) conf_id = {"scheme": "CERN_CODE", "value": n_subfield} _identifiers.append(conf_id) _conference_info["identifiers"] = _identifiers self["conference_info"] = _conference_info if not empty and i < len(_publication_info): _publication_info[i].update(temp_info) else: _publication_info.append(temp_info) return _publication_info
def related_records(self, key, value): """Translates related_records field. RELATED records """ _migration = self["_migration"] _related = _migration["related"] relation_type = "other" try: if key == "775__" and "b" in value: relation_type = clean_val("b", value, str) if key == "787__" and "i" in value: clean_val("i", value, str, manual=True) _related.append({ "related_recid": clean_val("w", value, str, req=True), "relation_type": relation_type, }) _migration.update({"related": _related, "has_related": True}) return _migration except ManualImportRequired as e: if key == "775__": e.subfield = "b or c" else: e.subfield = "i" raise e
def build_ils_contributor(value): """Create the contributors for books.""" if not value.get("a"): return [] contributor = { "identifiers": _extract_json_ils_ids(value, "scheme") or None, "full_name": value.get("name") or clean_val("a", value, str), "roles": [_get_correct_ils_contributor_role("e", value.get("e", "author"))], "alternative_names": [], } subfield_q = clean_val('q', value, str) if subfield_q: contributor.update({"alternative_names": [subfield_q]}) value_u = value.get("u") if value_u: values_u_list = list(force_list(value_u)) other = ["et al.", "et al"] for x in other: if x in values_u_list: values_u_list.remove(x) contributor["affiliations"] = [{"name": x} for x in values_u_list] contributor = dict( (k, v) for k, v in iteritems(contributor) if v is not None) return contributor
def publication_additional(self, key, value): """Translates additional publication info.""" _publication_info = self.get("publication_info", []) _migration = self["_migration"] empty = not bool(_publication_info) for i, v in enumerate(force_list(value)): temp_info = {} pages = clean_pages_range("k", v) if pages: temp_info.update(pages) rel_recid = clean_val("b", v, str) if rel_recid: _migration["journal_record_legacy_recid"] = rel_recid _migration["has_journal"] = True # assume that if we have a parent journal # then the doc is a periodical issue self["document_type"] = "PERIODICAL_ISSUE" n_subfield = clean_val("n", v, str) if n_subfield.upper() == "BOOK": temp_info.update({"material": "BOOK"}) else: _conference_info = self.get("conference_info", {}) _identifiers = _conference_info.get("identifiers", []) conf_id = {"scheme": "CERN_CODE", "value": n_subfield} _identifiers.append(conf_id) _conference_info["identifiers"] = _identifiers self["conference_info"] = _conference_info if not empty and i < len(_publication_info): _publication_info[i].update(temp_info) else: _publication_info.append(temp_info) return _publication_info
def document_type(self, key, value): """Translates document type field.""" _doc_type = self.get("document_type", {}) def doc_type_mapping(val): if val: return mapping(DOCUMENT_TYPE, val) for v in force_list(value): val_a = doc_type_mapping(clean_val("a", v, str)) val_b = doc_type_mapping(clean_val("b", v, str)) if not val_a and not val_b and not _doc_type: raise UnexpectedValue(subfield="a") if val_a and val_b and (val_a != val_b != _doc_type): raise ManualImportRequired(subfield="a or b - " "inconsistent doc type") if val_a: if _doc_type and _doc_type != val_a: raise ManualImportRequired(subfield="a" "inconsistent doc type") _doc_type = val_a if val_b: if _doc_type and _doc_type != val_a: raise ManualImportRequired(subfield="b" "inconsistent doc type") _doc_type = val_b return _doc_type
def accelerator_experiments(self, key, value): """Translates accelerator_experiments field.""" _extensions = self.get("extensions", {}) sub_a = clean_val("a", value, str) sub_e = clean_val("e", value, str) sub_p = clean_val("p", value, str) accelerators = _extensions.get("unit:accelerator", []) experiment = _extensions.get("unit:experiment", []) project = _extensions.get("unit:project", []) if sub_a and sub_a not in accelerators: accelerators.append(sub_a) if sub_e and sub_e not in experiment: experiment.append(sub_e) if sub_p and sub_p not in project: project.append(sub_p) _extensions.update({ "unit:accelerator": accelerators, "unit:experiment": experiment, "unit:project": project, }) return _extensions
def isbns(self, key, value): """Translates isbns fields.""" _isbns = self.get("identifiers", []) for v in force_list(value): subfield_u = clean_val("u", v, str) isbn = { "value": clean_val("a", v, str) or clean_val("z", v, str), "scheme": "ISBN", } if not isbn["value"]: raise ManualImportRequired(subfield="a or z") if subfield_u: volume = re.search(r"(\(*v[.| ]*\d+.*\)*)", subfield_u) if volume: volume = volume.group(1) subfield_u = subfield_u.replace(volume, "").strip() existing_volume = self.get("volume") if existing_volume: raise ManualImportRequired(subfield="u") # TODO volume --> when splitting to series self["volume"] = volume if subfield_u.upper() in MEDIUM_TYPES: isbn.update({"medium": subfield_u}) else: isbn.update({"description": subfield_u}) # TODO subfield C if isbn not in _isbns: _isbns.append(isbn) return _isbns
def note(self, key, value): """Translates public notes.""" # merge all found notes _note = self.get("note", "") if _note: _note = "{0} / {1}".format(_note, clean_val("a", value, str, req=True)) else: _note = clean_val("a", value, str, req=True) return _note
def alternative_abstracts(self, key, value): """Translates abstracts fields.""" abstract = self.get("abstract", None) _alternative_abstracts = self.get("alternative_abstracts", []) if not abstract: # takes first abstract as main self["abstract"] = clean_val("a", value, str, req=True) raise IgnoreKey("alternative_abstracts") new_abstract = clean_val("a", value, str, req=True) return new_abstract if new_abstract not in _alternative_abstracts else None
def table_of_content(self, key, value): """Translates table of content field.""" text = "{0} -- {1}".format( clean_val("a", value, str) or "", clean_val("t", value, str) or "").strip() if text != "--": chapters = re.split(r"; | -- |--", text) return chapters else: raise UnexpectedValue(subfield="a or t")
def identifiers(self, key, value): """Translate identifiers.""" _isbns = self.get("identifiers", []) for v in force_list(value): subfield_u = clean_val("u", v, str) sub_a = clean_val("a", v, str) if sub_a: isbn = {"value": sub_a, "scheme": "ISBN", "material": subfield_u} if isbn not in _isbns: _isbns.append(isbn) return _isbns
def title(self, key, value): """Translates title.""" if "title" in self: raise UnexpectedValue() if "b" in value: _alternative_titles = self.get("alternative_titles", []) _alternative_titles.append( {"value": clean_val("b", value, str), "type": "SUBTITLE"} ) self["alternative_titles"] = _alternative_titles return clean_val("a", value, str, req=True)
def book_series(self, key, value): """Match barcodes to volumes.""" val_n = clean_val("n", value, str) val_x = clean_val("x", value, str) _migration = self["_migration"] _migration["serials"].append({ "title": clean_val("a", value, str), "volume": clean_val("v", value, str), "issn": val_x, }) _migration["has_serial"] = True raise IgnoreKey("book_series")
def tags(self, key, value): """Translates tag field - WARNING - also document type and serial field.""" _tags = self.get("tags", []) for v in force_list(value): result_a = mapping(COLLECTION, clean_val("a", v, str)) result_b = mapping(COLLECTION, clean_val("b", v, str)) if result_a: _tags.append(result_a) if result_a not in _tags else None if result_b: _tags.append(result_b) if result_b not in _tags else None if not result_a and not result_b: special_serials(self, key, value) return _tags
def identifiers(self, key, value): """Translate identifiers.""" _identifiers = self.get("identifiers", []) for v in force_list(value): subfield_u = clean_val("u", v, str) sub_a = clean_val("a", v, str) sub_2 = clean_val("2", v, str) if sub_2.lowercase() != "doi": raise ManualImportRequired("wrong DOI marc") doi = {"value": sub_a, "scheme": "DOI", "material": subfield_u} if doi not in _identifiers: _identifiers.append(doi) return _identifiers
def imprint(self, key, value): """Translate imprint field.""" _publication_year = self.get("publication_year") if _publication_year: raise UnexpectedValue(subfield="e", message="doubled publication year") pub_year = reverse_replace(clean_val("c", value, str), ".", "") self["publication_year"] = pub_year return { "place": reverse_replace(clean_val("a", value, str), ":", ""), "publisher": reverse_replace(clean_val("b", value, str), ",", ""), "date": pub_year, }
def imprint(self, key, value): """Translate imprint field.""" _publication_year = self.get("publication_year") if _publication_year: raise UnexpectedValue(subfield="e", message="doubled publication year") self["publication_year"] = clean_val("c", value, str) publisher = ", ".join([entry for entry in value.get("b")]) return { "place": clean_val("a", value, str), "publisher": publisher, "date": clean_val("c", value, str), }
def standard_review(self, key, value): """Translates standard_status field.""" _extensions = self.get("extensions", {}) _extensions.update({ "standard_review:applicability": clean_val("i", value, str), "standard_review:validity": clean_val("v", value, str), "standard_review:checkdate": clean_val("z", value, str), "standard_review:expert": clean_val("p", value, str), }) return _extensions
def barcodes(self, key, value): """Match barcodes of items to volumes.""" val_n = clean_val("n", value, str) val_x = clean_val("x", value, str) _migration = self["_migration"] _migration["volumes"].append( dict( volume=extract_volume_number(val_n, raise_exception=True, subfield="n"), barcode=val_x, )) raise IgnoreKey("barcodes")
def standard_numbers(self, key, value): """Translates standard numbers values.""" _identifiers = self.get("identifiers", []) a = clean_val("a", value, str) b = clean_val("b", value, str) sn = a or b if sn: _identifiers.append({ "value": sn, "scheme": "STANDARD_NUMBER", "hidden": True if b else False, }) return _identifiers raise MissingRequiredField(subfield="a or b")
def copyright(self, key, value): """Translates copyright fields.""" material = mapping( MATERIALS, clean_val("3", value, str, transform="lower"), raise_exception=True, ) return { "material": material, "holder": clean_val("d", value, str), "statement": clean_val("f", value, str), "year": clean_val("g", value, int), "url": clean_val("u", value, str), }
def collaborations(self, key, value): """Translates collaborations.""" _authors = self.get("authors", []) for v in force_list(value): if "g" in v: _authors.append({ "full_name": clean_val("g", v, str), "type": "ORGANISATION" }) elif "5" in v: _authors.append({ "full_name": clean_val("5", v, str), "type": "ORGANISATION" }) return _authors
def alternative_identifiers(self, key, value): """Translates external_system_identifiers fields.""" field_type = clean_val("2", value, str) sub_a = clean_val("a", value, str, req=True) indentifier_entry = {} if key == "0247_": if field_type and field_type.lower() == "doi": # if 0247__2 == doi it is a DOI identifier self["identifiers"] = dois(self, key, value) raise IgnoreKey("alternative_identifiers") elif field_type and field_type.lower() == "asin": raise IgnoreKey("alternative_identifiers") else: raise UnexpectedValue(subfield="2") if key == "035__": if "CERCER" in sub_a: raise IgnoreKey("alternative_identifiers") sub_9 = clean_val("9", value, str, req=True) if "CERCER" in sub_9: raise IgnoreKey("alternative_identifiers") # conference_info.identifiers mixed data if sub_9.upper() == "INSPIRE-CNUM": _conference_info = self.get("conference_info", {}) _prev_identifiers = _conference_info.get("identifiers", []) _prev_identifiers.append({ "scheme": "INSPIRE_CNUM", "value": sub_a }) _conference_info.update({"identifiers": _prev_identifiers}) self["conference_info"] = _conference_info raise IgnoreKey("alternative_identifiers") elif sub_9.upper() in EXTERNAL_SYSTEM_IDENTIFIERS: indentifier_entry.update({"value": sub_a, "scheme": sub_9}) elif sub_9.upper() in EXTERNAL_SYSTEM_IDENTIFIERS_TO_IGNORE: raise IgnoreKey("external_system_identifiers") else: raise UnexpectedValue(subfield="9") if key == "036__": if "a" in value and "9" in value: indentifier_entry.update({ "value": sub_a, "scheme": clean_val("9", value, str, req=True) }) return indentifier_entry
def serial(self, key, value): """Translate serial.""" issn_value = clean_val("x", value, str) identifiers = None if issn_value: identifiers = [{"scheme": "ISSN", "value": issn_value}] volume = clean_val("v", value, str) if volume: volume = re.findall(r"\d+", volume) return { "title": clean_val("a", value, str, req=True), "identifiers": identifiers, "volume": volume[0] if volume else None, }
def alt_authors(self, key, value): """Translates the alternative authors field.""" _authors = self.get("authors", []) if _authors: for i, v in enumerate(force_list(value)): _authors[i].update({"alternative_names": clean_val("a", v, str)}) return _authors
def languages(self, key, value): """Translates languages fields.""" lang = clean_val("b", value, str).lower() try: return pycountry.languages.lookup(lang).alpha_2 except (KeyError, AttributeError, LookupError): raise UnexpectedValue(subfield="a")
def eitem_int_note(self, key, value): """Translate eitem internal note.""" _eitem = self.get("_eitem", {}) int_note = clean_val("a", value, str) _eitem.update({"internal_notes": int_note}) return _eitem
def arxiv_eprints(self, key, value): """Translates arxiv_eprints fields. output: { 'alternative_identifiers': [{'scheme': 'arXiv', 'value': `037__a`}], } """ def check_category(field, val): category = clean_val(field, val, str) if category: if category in ARXIV_CATEGORIES: return category raise UnexpectedValue(subfield=field) if key == "037__": _alternative_identifiers = self.get("alternative_identifiers", []) for v in force_list(value): eprint_id = clean_val("a", v, str, req=True) duplicated = [ elem for i, elem in enumerate(_alternative_identifiers) if elem["value"] == eprint_id and elem["scheme"].lower() == "arxiv" ] category = check_category("c", v) if not duplicated: eprint = {"value": eprint_id, "scheme": "arXiv"} _alternative_identifiers.append(eprint) self["alternative_identifiers"] = _alternative_identifiers if category: _subjects = self.get("subjects", []) subject = {"scheme": "arXiv", "value": category} _subjects.append(subject) if subject not in _subjects else None self["subjects"] = _subjects raise IgnoreKey("subjects")
def licenses(self, key, value): """Translates license fields.""" material = mapping( MATERIALS, clean_val("3", value, str, transform="lower"), raise_exception=True, ) return { "license": { "url": clean_val("u", value, str), "name": clean_val("a", value, str), }, "material": material, "internal_note": clean_val("g", value, str), }