Пример #1
0
def related_records(self, key, value):
    """Translates related_records field.

    RELATED records
    """
    _migration = self["_migration"]
    _related = _migration["related"]
    relation_type = OTHER_RELATION.name
    relation_description = None
    try:
        if key == "775__" and "b" in value:
            description = clean_val("b", value, str)
            relation_description = description
            relation_type_tag = clean_val("x", value, str)
            if relation_type_tag:
                if relation_type_tag.lower() == "edition":
                    relation_type = EDITION_RELATION.name
                elif relation_type_tag.lower() == "language":
                    relation_type = LANGUAGE_RELATION.name

        if key == "787__" and "i" in value:
            clean_val("i", value, str, manual=True)
        _related.append({
            "related_recid": clean_val("w", value, str, req=True),
            "relation_type": relation_type,
            "relation_description": relation_description,
        })
        _migration.update({"related": _related, "has_related": True})
        raise IgnoreKey("_migration")
    except ManualImportRequired as e:
        if key == "775__":
            e.subfield = "b or c"
        else:
            e.subfield = "i"
        raise e
Пример #2
0
def publication_additional(self, key, value):
    """Translates additional publication info & other related_records field."""
    _publication_info = self.get("publication_info", [])
    _migration = self["_migration"]
    _related = _migration["related"]
    empty = not bool(_publication_info)
    for i, v in enumerate(force_list(value)):
        temp_info = {}
        pages = clean_pages_range("k", v)
        if pages:
            temp_info.update(pages)
        rel_recid = clean_val("b", v, str)
        if rel_recid:
            _related.append({
                "related_recid": rel_recid,
                "relation_type": OTHER_RELATION.name,
                "relation_description": "chapter of"
            })
            _migration.update({"related": _related, "has_related": True})
        n_subfield = clean_val("n", v, str)
        if n_subfield.upper() == "BOOK":
            temp_info.update({"material": "BOOK"})
        else:
            _conference_info = self.get("conference_info", {})
            _identifiers = _conference_info.get("identifiers", [])
            conf_id = {"scheme": "CERN_CODE", "value": n_subfield}
            _identifiers.append(conf_id)
            _conference_info["identifiers"] = _identifiers
            self["conference_info"] = _conference_info
        if not empty and i < len(_publication_info):
            _publication_info[i].update(temp_info)
        else:
            _publication_info.append(temp_info)

    return _publication_info
Пример #3
0
def related_records(self, key, value):
    """Translates related_records field.

    RELATED records
    """
    _migration = self["_migration"]
    _related = _migration["related"]
    relation_type = "other"
    try:
        if key == "775__" and "b" in value:
            relation_type = clean_val("b", value, str)
        if key == "787__" and "i" in value:
            clean_val("i", value, str, manual=True)
        _related.append({
            "related_recid": clean_val("w", value, str, req=True),
            "relation_type": relation_type,
        })
        _migration.update({"related": _related, "has_related": True})
        return _migration
    except ManualImportRequired as e:
        if key == "775__":
            e.subfield = "b or c"
        else:
            e.subfield = "i"
        raise e
Пример #4
0
def build_ils_contributor(value):
    """Create the contributors for books."""
    if not value.get("a"):
        return []

    contributor = {
        "identifiers": _extract_json_ils_ids(value, "scheme") or None,
        "full_name": value.get("name") or clean_val("a", value, str),
        "roles":
        [_get_correct_ils_contributor_role("e", value.get("e", "author"))],
        "alternative_names": [],
    }

    subfield_q = clean_val('q', value, str)
    if subfield_q:
        contributor.update({"alternative_names": [subfield_q]})

    value_u = value.get("u")
    if value_u:
        values_u_list = list(force_list(value_u))
        other = ["et al.", "et al"]
        for x in other:
            if x in values_u_list:
                values_u_list.remove(x)
        contributor["affiliations"] = [{"name": x} for x in values_u_list]
    contributor = dict(
        (k, v) for k, v in iteritems(contributor) if v is not None)
    return contributor
Пример #5
0
def publication_additional(self, key, value):
    """Translates additional publication info."""
    _publication_info = self.get("publication_info", [])
    _migration = self["_migration"]
    empty = not bool(_publication_info)
    for i, v in enumerate(force_list(value)):
        temp_info = {}
        pages = clean_pages_range("k", v)
        if pages:
            temp_info.update(pages)
        rel_recid = clean_val("b", v, str)
        if rel_recid:
            _migration["journal_record_legacy_recid"] = rel_recid
            _migration["has_journal"] = True
            # assume that if we have a parent journal
            # then the doc is a periodical issue
            self["document_type"] = "PERIODICAL_ISSUE"
        n_subfield = clean_val("n", v, str)
        if n_subfield.upper() == "BOOK":
            temp_info.update({"material": "BOOK"})
        else:
            _conference_info = self.get("conference_info", {})
            _identifiers = _conference_info.get("identifiers", [])
            conf_id = {"scheme": "CERN_CODE", "value": n_subfield}
            _identifiers.append(conf_id)
            _conference_info["identifiers"] = _identifiers
            self["conference_info"] = _conference_info
        if not empty and i < len(_publication_info):
            _publication_info[i].update(temp_info)
        else:
            _publication_info.append(temp_info)

    return _publication_info
Пример #6
0
def document_type(self, key, value):
    """Translates document type field."""
    _doc_type = self.get("document_type", {})

    def doc_type_mapping(val):
        if val:
            return mapping(DOCUMENT_TYPE, val)

    for v in force_list(value):
        val_a = doc_type_mapping(clean_val("a", v, str))
        val_b = doc_type_mapping(clean_val("b", v, str))

        if not val_a and not val_b and not _doc_type:
            raise UnexpectedValue(subfield="a")

        if val_a and val_b and (val_a != val_b != _doc_type):
            raise ManualImportRequired(subfield="a or b - "
                                       "inconsistent doc type")
        if val_a:
            if _doc_type and _doc_type != val_a:
                raise ManualImportRequired(subfield="a"
                                           "inconsistent doc type")
            _doc_type = val_a
        if val_b:
            if _doc_type and _doc_type != val_a:
                raise ManualImportRequired(subfield="b"
                                           "inconsistent doc type")
            _doc_type = val_b
    return _doc_type
Пример #7
0
def accelerator_experiments(self, key, value):
    """Translates accelerator_experiments field."""
    _extensions = self.get("extensions", {})

    sub_a = clean_val("a", value, str)
    sub_e = clean_val("e", value, str)
    sub_p = clean_val("p", value, str)

    accelerators = _extensions.get("unit:accelerator", [])
    experiment = _extensions.get("unit:experiment", [])
    project = _extensions.get("unit:project", [])

    if sub_a and sub_a not in accelerators:
        accelerators.append(sub_a)
    if sub_e and sub_e not in experiment:
        experiment.append(sub_e)
    if sub_p and sub_p not in project:
        project.append(sub_p)

    _extensions.update({
        "unit:accelerator": accelerators,
        "unit:experiment": experiment,
        "unit:project": project,
    })
    return _extensions
Пример #8
0
def isbns(self, key, value):
    """Translates isbns fields."""
    _isbns = self.get("identifiers", [])
    for v in force_list(value):
        subfield_u = clean_val("u", v, str)
        isbn = {
            "value": clean_val("a", v, str) or clean_val("z", v, str),
            "scheme": "ISBN",
        }
        if not isbn["value"]:
            raise ManualImportRequired(subfield="a or z")
        if subfield_u:
            volume = re.search(r"(\(*v[.| ]*\d+.*\)*)", subfield_u)

            if volume:
                volume = volume.group(1)
                subfield_u = subfield_u.replace(volume, "").strip()
                existing_volume = self.get("volume")
                if existing_volume:
                    raise ManualImportRequired(subfield="u")
                # TODO volume --> when splitting to series
                self["volume"] = volume
            if subfield_u.upper() in MEDIUM_TYPES:
                isbn.update({"medium": subfield_u})
            else:
                isbn.update({"description": subfield_u})
        # TODO subfield C
        if isbn not in _isbns:
            _isbns.append(isbn)
    return _isbns
Пример #9
0
def note(self, key, value):
    """Translates public notes."""
    # merge all found notes
    _note = self.get("note", "")
    if _note:
        _note = "{0} / {1}".format(_note, clean_val("a", value, str, req=True))
    else:
        _note = clean_val("a", value, str, req=True)

    return _note
Пример #10
0
def alternative_abstracts(self, key, value):
    """Translates abstracts fields."""
    abstract = self.get("abstract", None)
    _alternative_abstracts = self.get("alternative_abstracts", [])
    if not abstract:
        # takes first abstract as main
        self["abstract"] = clean_val("a", value, str, req=True)
        raise IgnoreKey("alternative_abstracts")
    new_abstract = clean_val("a", value, str, req=True)
    return new_abstract if new_abstract not in _alternative_abstracts else None
Пример #11
0
def table_of_content(self, key, value):
    """Translates table of content field."""
    text = "{0} -- {1}".format(
        clean_val("a", value, str) or "",
        clean_val("t", value, str) or "").strip()
    if text != "--":
        chapters = re.split(r"; | -- |--", text)
        return chapters
    else:
        raise UnexpectedValue(subfield="a or t")
Пример #12
0
def identifiers(self, key, value):
    """Translate identifiers."""
    _isbns = self.get("identifiers", [])
    for v in force_list(value):
        subfield_u = clean_val("u", v, str)
        sub_a = clean_val("a", v, str)
        if sub_a:
            isbn = {"value": sub_a, "scheme": "ISBN", "material": subfield_u}
            if isbn not in _isbns:
                _isbns.append(isbn)
    return _isbns
Пример #13
0
def title(self, key, value):
    """Translates title."""
    if "title" in self:
        raise UnexpectedValue()

    if "b" in value:
        _alternative_titles = self.get("alternative_titles", [])
        _alternative_titles.append(
            {"value": clean_val("b", value, str), "type": "SUBTITLE"}
        )
        self["alternative_titles"] = _alternative_titles
    return clean_val("a", value, str, req=True)
Пример #14
0
def book_series(self, key, value):
    """Match barcodes to volumes."""
    val_n = clean_val("n", value, str)
    val_x = clean_val("x", value, str)

    _migration = self["_migration"]
    _migration["serials"].append({
        "title": clean_val("a", value, str),
        "volume": clean_val("v", value, str),
        "issn": val_x,
    })
    _migration["has_serial"] = True
    raise IgnoreKey("book_series")
Пример #15
0
def tags(self, key, value):
    """Translates tag field - WARNING - also document type and serial field."""
    _tags = self.get("tags", [])
    for v in force_list(value):
        result_a = mapping(COLLECTION, clean_val("a", v, str))
        result_b = mapping(COLLECTION, clean_val("b", v, str))
        if result_a:
            _tags.append(result_a) if result_a not in _tags else None
        if result_b:
            _tags.append(result_b) if result_b not in _tags else None
        if not result_a and not result_b:
            special_serials(self, key, value)
    return _tags
Пример #16
0
def identifiers(self, key, value):
    """Translate identifiers."""
    _identifiers = self.get("identifiers", [])
    for v in force_list(value):
        subfield_u = clean_val("u", v, str)
        sub_a = clean_val("a", v, str)
        sub_2 = clean_val("2", v, str)
        if sub_2.lowercase() != "doi":
            raise ManualImportRequired("wrong DOI marc")
        doi = {"value": sub_a, "scheme": "DOI", "material": subfield_u}
        if doi not in _identifiers:
            _identifiers.append(doi)
    return _identifiers
Пример #17
0
def imprint(self, key, value):
    """Translate imprint field."""
    _publication_year = self.get("publication_year")
    if _publication_year:
        raise UnexpectedValue(subfield="e", message="doubled publication year")
    pub_year = reverse_replace(clean_val("c", value, str), ".", "")
    self["publication_year"] = pub_year

    return {
        "place": reverse_replace(clean_val("a", value, str), ":", ""),
        "publisher": reverse_replace(clean_val("b", value, str), ",", ""),
        "date": pub_year,
    }
Пример #18
0
def imprint(self, key, value):
    """Translate imprint field."""
    _publication_year = self.get("publication_year")
    if _publication_year:
        raise UnexpectedValue(subfield="e", message="doubled publication year")
    self["publication_year"] = clean_val("c", value, str)

    publisher = ", ".join([entry for entry in value.get("b")])
    return {
        "place": clean_val("a", value, str),
        "publisher": publisher,
        "date": clean_val("c", value, str),
    }
Пример #19
0
def standard_review(self, key, value):
    """Translates standard_status field."""
    _extensions = self.get("extensions", {})
    _extensions.update({
        "standard_review:applicability":
        clean_val("i", value, str),
        "standard_review:validity":
        clean_val("v", value, str),
        "standard_review:checkdate":
        clean_val("z", value, str),
        "standard_review:expert":
        clean_val("p", value, str),
    })
    return _extensions
Пример #20
0
def barcodes(self, key, value):
    """Match barcodes of items to volumes."""
    val_n = clean_val("n", value, str)
    val_x = clean_val("x", value, str)

    _migration = self["_migration"]
    _migration["volumes"].append(
        dict(
            volume=extract_volume_number(val_n,
                                         raise_exception=True,
                                         subfield="n"),
            barcode=val_x,
        ))
    raise IgnoreKey("barcodes")
Пример #21
0
def standard_numbers(self, key, value):
    """Translates standard numbers values."""
    _identifiers = self.get("identifiers", [])
    a = clean_val("a", value, str)
    b = clean_val("b", value, str)
    sn = a or b
    if sn:
        _identifiers.append({
            "value": sn,
            "scheme": "STANDARD_NUMBER",
            "hidden": True if b else False,
        })
        return _identifiers
    raise MissingRequiredField(subfield="a or b")
Пример #22
0
def copyright(self, key, value):
    """Translates copyright fields."""
    material = mapping(
        MATERIALS,
        clean_val("3", value, str, transform="lower"),
        raise_exception=True,
    )

    return {
        "material": material,
        "holder": clean_val("d", value, str),
        "statement": clean_val("f", value, str),
        "year": clean_val("g", value, int),
        "url": clean_val("u", value, str),
    }
Пример #23
0
def collaborations(self, key, value):
    """Translates collaborations."""
    _authors = self.get("authors", [])
    for v in force_list(value):
        if "g" in v:
            _authors.append({
                "full_name": clean_val("g", v, str),
                "type": "ORGANISATION"
            })
        elif "5" in v:
            _authors.append({
                "full_name": clean_val("5", v, str),
                "type": "ORGANISATION"
            })
    return _authors
Пример #24
0
def alternative_identifiers(self, key, value):
    """Translates external_system_identifiers fields."""
    field_type = clean_val("2", value, str)
    sub_a = clean_val("a", value, str, req=True)
    indentifier_entry = {}
    if key == "0247_":
        if field_type and field_type.lower() == "doi":
            # if 0247__2 == doi it is a DOI identifier
            self["identifiers"] = dois(self, key, value)
            raise IgnoreKey("alternative_identifiers")
        elif field_type and field_type.lower() == "asin":
            raise IgnoreKey("alternative_identifiers")
        else:
            raise UnexpectedValue(subfield="2")
    if key == "035__":
        if "CERCER" in sub_a:
            raise IgnoreKey("alternative_identifiers")
        sub_9 = clean_val("9", value, str, req=True)
        if "CERCER" in sub_9:
            raise IgnoreKey("alternative_identifiers")
        # conference_info.identifiers mixed data
        if sub_9.upper() == "INSPIRE-CNUM":
            _conference_info = self.get("conference_info", {})
            _prev_identifiers = _conference_info.get("identifiers", [])
            _prev_identifiers.append({
                "scheme": "INSPIRE_CNUM",
                "value": sub_a
            })
            _conference_info.update({"identifiers": _prev_identifiers})
            self["conference_info"] = _conference_info
            raise IgnoreKey("alternative_identifiers")

        elif sub_9.upper() in EXTERNAL_SYSTEM_IDENTIFIERS:
            indentifier_entry.update({"value": sub_a, "scheme": sub_9})
        elif sub_9.upper() in EXTERNAL_SYSTEM_IDENTIFIERS_TO_IGNORE:
            raise IgnoreKey("external_system_identifiers")
        else:
            raise UnexpectedValue(subfield="9")
    if key == "036__":
        if "a" in value and "9" in value:
            indentifier_entry.update({
                "value":
                sub_a,
                "scheme":
                clean_val("9", value, str, req=True)
            })

    return indentifier_entry
Пример #25
0
def serial(self, key, value):
    """Translate serial."""
    issn_value = clean_val("x", value, str)
    identifiers = None
    if issn_value:
        identifiers = [{"scheme": "ISSN", "value": issn_value}]

    volume = clean_val("v", value, str)
    if volume:
        volume = re.findall(r"\d+", volume)

    return {
        "title": clean_val("a", value, str, req=True),
        "identifiers": identifiers,
        "volume": volume[0] if volume else None,
    }
Пример #26
0
def alt_authors(self, key, value):
    """Translates the alternative authors field."""
    _authors = self.get("authors", [])
    if _authors:
        for i, v in enumerate(force_list(value)):
            _authors[i].update({"alternative_names": clean_val("a", v, str)})
    return _authors
Пример #27
0
def languages(self, key, value):
    """Translates languages fields."""
    lang = clean_val("b", value, str).lower()
    try:
        return pycountry.languages.lookup(lang).alpha_2
    except (KeyError, AttributeError, LookupError):
        raise UnexpectedValue(subfield="a")
Пример #28
0
def eitem_int_note(self, key, value):
    """Translate eitem internal note."""
    _eitem = self.get("_eitem", {})

    int_note = clean_val("a", value, str)
    _eitem.update({"internal_notes": int_note})
    return _eitem
Пример #29
0
def arxiv_eprints(self, key, value):
    """Translates arxiv_eprints fields.

    output:
    {
      'alternative_identifiers': [{'scheme': 'arXiv', 'value': `037__a`}],
    }
    """
    def check_category(field, val):
        category = clean_val(field, val, str)
        if category:
            if category in ARXIV_CATEGORIES:
                return category
            raise UnexpectedValue(subfield=field)

    if key == "037__":
        _alternative_identifiers = self.get("alternative_identifiers", [])
        for v in force_list(value):
            eprint_id = clean_val("a", v, str, req=True)
            duplicated = [
                elem for i, elem in enumerate(_alternative_identifiers)
                if elem["value"] == eprint_id
                and elem["scheme"].lower() == "arxiv"
            ]
            category = check_category("c", v)
            if not duplicated:
                eprint = {"value": eprint_id, "scheme": "arXiv"}
                _alternative_identifiers.append(eprint)
                self["alternative_identifiers"] = _alternative_identifiers
            if category:
                _subjects = self.get("subjects", [])
                subject = {"scheme": "arXiv", "value": category}
                _subjects.append(subject) if subject not in _subjects else None
                self["subjects"] = _subjects
        raise IgnoreKey("subjects")
Пример #30
0
def licenses(self, key, value):
    """Translates license fields."""
    material = mapping(
        MATERIALS,
        clean_val("3", value, str, transform="lower"),
        raise_exception=True,
    )

    return {
        "license": {
            "url": clean_val("u", value, str),
            "name": clean_val("a", value, str),
        },
        "material": material,
        "internal_note": clean_val("g", value, str),
    }