Exemplo n.º 1
0
def barcode(self, key, value):
    """Translates the barcodes."""
    _migration = self["_migration"]
    for v in force_list(value):
        val_a = clean_val("a", v, str)
        val_n = clean_val("n", v, str)
        val_x = clean_val("x", v, str)
        val_9 = clean_val("9", v, str)
        if val_a or val_9:
            if val_n or val_x or val_a and val_9:
                raise UnexpectedValue()
            identifier = {"scheme": "report_number", "value": val_a or val_9}
            if val_9:
                identifier["hidden"] = True
            identifiers = self.get("identifiers", [])
            identifiers.append(identifier)
            self["identifiers"] = identifiers
            raise IgnoreKey("barcode")

        if val_n and val_x:
            volume_number = extract_volume_number(
                val_n, raise_exception=True, subfield="n"
            )
            _insert_volume(_migration, volume_number, {"barcode": val_x})
        elif val_x:
            raise MissingRequiredField(
                subfield="n", message=" this record is missing a volume number"
            )
        else:
            raise MissingRequiredField(
                subfield="x",
                message=" this record is missing a barcode number",
            )
    raise IgnoreKey("barcode")
Exemplo n.º 2
0
def isbns(self, key, value):
    """Translates isbns stored in the record."""
    _migration = self["_migration"]
    _identifiers = self.get("identifiers", [])

    val_u = clean_val("u", value, str)
    val_a = clean_val("a", value, str)
    val_b = clean_val("b", value, str)

    if val_u:
        volume_info = extract_volume_info(val_u)
        # if set found it means that the isbn is for the whole multipart
        set_search = re.search(r"(.*?)\(set\.*\)", val_u)
        if volume_info:
            # if we have volume there it means that the ISBN is of the volume
            volume_obj = {
                "isbn": clean_val("a", value, str),
                "physical_description": volume_info["description"].strip(),
                "is_electronic": val_b is not None,
            }
            _insert_volume(_migration, volume_info["volume"], volume_obj)
            raise IgnoreKey("identifiers")
        if set_search:
            self["physical_description"] = set_search.group(1).strip()
            isbn = {"scheme": "ISBN", "value": val_a}
            return isbn if isbn not in _identifiers else None
        if not volume_info:
            # Try to find a volume number
            volume_number = extract_volume_number(val_u)
            if volume_number:
                # volume, but without description
                volume_obj = {
                    "isbn": clean_val("a", value, str),
                    "is_electronic": val_b is not None,
                }
                _insert_volume(_migration, volume_number, volume_obj)
                raise IgnoreKey("identifiers")
            elif extract_volume_number(val_u, search=True):
                raise UnexpectedValue(
                    subfield="u",
                    message=" found volume but failed to parse description",
                )
            else:
                self["physical_description"] = val_u
                isbn = {"scheme": "ISBN", "value": val_a}
                return isbn if isbn not in _identifiers else None
        if not set_search and not volume_info:
            self["physical_description"] = val_u
            isbn = {"scheme": "ISBN", "value": val_a}
            return isbn if isbn not in _identifiers else None
    elif not val_u and val_a:
        # if I dont have volume info but only isbn
        isbn = {"scheme": "ISBN", "value": val_a}
        return isbn if isbn not in _identifiers else None
    else:
        raise UnexpectedValue(subfield="a", message=" isbn not provided")
Exemplo n.º 3
0
 def wrapper(self, key, value, **kwargs):
     out = f(self, key, value)
     if out:
         clean_list = [
             dict((k, v) for k, v in elem.items() if v) for elem in out
             if elem
         ]
         clean_list = [elem for elem in clean_list if elem]
         if not clean_list:
             raise IgnoreKey(key)
         return clean_list
     else:
         raise IgnoreKey(key)
Exemplo n.º 4
0
 def proxy(self, key, value, **kwargs):
     res = fn_decorated(self, key, value, **kwargs)
     if not res:
         raise IgnoreKey(key)
     if isinstance(res, str):
         # the value is not checked for empty strings here because clean_val
         # does the job, it will be None caught before
         return res.strip()
     elif isinstance(res, list):
         cleaned = [elem.strip() for elem in res if elem]
         if not cleaned:
             raise IgnoreKey(key)
         return cleaned
     else:
         return res
Exemplo n.º 5
0
def related_records(self, key, value):
    """Translates related_records field.

    RELATED records
    """
    _migration = self["_migration"]
    _related = _migration["related"]
    relation_type = OTHER_RELATION.name
    relation_description = None
    try:
        if key == "775__" and "b" in value:
            description = clean_val("b", value, str)
            relation_description = description
            relation_type_tag = clean_val("x", value, str)
            if relation_type_tag:
                if relation_type_tag.lower() == "edition":
                    relation_type = EDITION_RELATION.name
                elif relation_type_tag.lower() == "language":
                    relation_type = LANGUAGE_RELATION.name

        if key == "787__" and "i" in value:
            clean_val("i", value, str, manual=True)
        _related.append({
            "related_recid": clean_val("w", value, str, req=True),
            "relation_type": relation_type,
            "relation_description": relation_description,
        })
        _migration.update({"related": _related, "has_related": True})
        raise IgnoreKey("_migration")
    except ManualImportRequired as e:
        if key == "775__":
            e.subfield = "b or c"
        else:
            e.subfield = "i"
        raise e
Exemplo n.º 6
0
def isbns(self, key, value):
    """Translates isbns fields."""
    _isbns = self.get("identifiers", [])
    for v in force_list(value):
        subfield_u = clean_val("u", v, str)
        isbn = {
            "value": clean_val("a", v, str) or clean_val("z", v, str),
            "scheme": "ISBN",
        }
        if not isbn["value"]:
            raise IgnoreKey("identifiers")
        if subfield_u:
            volume = re.search(r"(\(*v[.| ]*\d+.*\)*)", subfield_u)

            if volume:
                volume = volume.group(1)
                subfield_u = subfield_u.replace(volume, "").strip()
                existing_volume = self.get("volume")
                if existing_volume:
                    raise ManualImportRequired(subfield="u")
                self["volume"] = volume
            # WARNING! vocabulary document_identifiers_materials
            material = mapping(IDENTIFIERS_MEDIUM_TYPES,
                               subfield_u,
                               subfield="u")
            if material:
                isbn.update({"material": material})
        if isbn not in _isbns:
            _isbns.append(isbn)
    return _isbns
Exemplo n.º 7
0
def arxiv_eprints(self, key, value):
    """Translates arxiv_eprints fields.

    output:
    {
      'alternative_identifiers': [{'scheme': 'arXiv', 'value': `037__a`}],
    }
    """
    def check_category(field, val):
        category = clean_val(field, val, str)
        if category:
            if category in ARXIV_CATEGORIES:
                return category
            raise UnexpectedValue(subfield=field)

    if key == "037__":
        _alternative_identifiers = self.get("alternative_identifiers", [])
        for v in force_list(value):
            eprint_id = clean_val("a", v, str, req=True)
            duplicated = [
                elem for i, elem in enumerate(_alternative_identifiers)
                if elem["value"] == eprint_id
                and elem["scheme"].lower() == "arxiv"
            ]
            category = check_category("c", v)
            if not duplicated:
                eprint = {"value": eprint_id, "scheme": "arXiv"}
                _alternative_identifiers.append(eprint)
                self["alternative_identifiers"] = _alternative_identifiers
            if category:
                _subjects = self.get("subjects", [])
                subject = {"scheme": "arXiv", "value": category}
                _subjects.append(subject) if subject not in _subjects else None
                self["subjects"] = _subjects
        raise IgnoreKey("subjects")
Exemplo n.º 8
0
def isbns(self, key, value):
    """Translates isbns fields."""
    _isbns = self.get("identifiers", [])
    for v in force_list(value):
        subfield_u = clean_val("u", v, str)
        isbn = {
            "value": clean_val("a", v, str) or clean_val("z", v, str),
            "scheme": "ISBN",
        }
        if not isbn["value"]:
            raise IgnoreKey("identifiers")
        if subfield_u:
            volume = re.search(r"(\(*v[.| ]*\d+.*\)*)", subfield_u)

            if volume:
                volume = volume.group(1)
                subfield_u = subfield_u.replace(volume, "").strip()
                existing_volume = self.get("volume")
                if existing_volume:
                    raise ManualImportRequired(subfield="u")
                self["volume"] = volume
            if subfield_u.upper() in MEDIUM_TYPES:
                isbn.update({"medium": subfield_u})
            else:
                isbn.update({"description": subfield_u})
        if isbn not in _isbns:
            _isbns.append(isbn)
    return _isbns
Exemplo n.º 9
0
def open_access(self, key, value):
    """Translate open access."""
    _open_access = clean_val("a", value, str)
    _eitem = self.get("_eitem", {})
    if _open_access.lower() == "open access":
        _eitem["open_access"] = True
        self["_eitem"] = _eitem
    raise IgnoreKey("open_access")
Exemplo n.º 10
0
def open_access(self, key, value):
    """Translate open access field.

    If the field is present, then the eitems of this record have open access
    """
    sub_r = clean_val("r", value, str)
    if sub_r and "open access" in sub_r.lower():
        self["_migration"]["eitems_open_access"] = True
    raise IgnoreKey("_migration")
Exemplo n.º 11
0
def document_type(self, key, value):
    """Translates document type field."""
    for v in force_list(value):
        clean_val_a = clean_val("a", v, str)
        if (((key == "980__" or key == "690C_") and clean_val_a == "PERI")
                or key == "960__" and clean_val_a == "31"):
            raise IgnoreKey("document_type")
        else:
            raise UnexpectedValue(subfield="a")
Exemplo n.º 12
0
def alternative_abstracts(self, key, value):
    """Translates abstracts fields."""
    abstract = self.get("abstract", None)
    _alternative_abstracts = self.get("alternative_abstracts", [])
    if not abstract:
        # takes first abstract as main
        self["abstract"] = clean_val("a", value, str, req=True)
        raise IgnoreKey("alternative_abstracts")
    new_abstract = clean_val("a", value, str, req=True)
    return new_abstract if new_abstract not in _alternative_abstracts else None
Exemplo n.º 13
0
def open_access(self, key, value):
    """Translate open access field.

    If the field is present, then the eitems of this record have open access
    """
    has_open_access = "r" in value
    if has_open_access:
        self["_migration"]["eitems_open_access"] = True

    raise IgnoreKey("_migration")
Exemplo n.º 14
0
def children_records(self, key, value):
    """Translates fields related to children record types."""
    _migration = self["_migration"]
    _electronic_items = _migration.get("electronic_items", [])
    if key == "362__":
        _electronic_items.append({"subscription": clean_val("a", value, str)})
    _migration.update({
        "electronic_items": _electronic_items,
    })

    raise IgnoreKey("_children")
Exemplo n.º 15
0
def subject_classification(self, key, value):
    """Translates subject classification field."""
    prev_subjects = self.get("subjects", [])
    _subject_classification = {
        "value": clean_val("c", value, str, req=True),
        "scheme": "ICS"
    }
    if _subject_classification not in prev_subjects:
        return _subject_classification
    else:
        raise IgnoreKey("subjects")
Exemplo n.º 16
0
def subject_classification(self, key, value):
    """Translates subject classification field."""
    prev_subjects = self.get("subjects", [])
    _subject_classification = {"value": clean_val("a", value, str, req=True)}
    if key == "080__":
        _subject_classification.update({"scheme": "UDC"})
    elif key.startswith("082"):
        _subject_classification.update({"scheme": "Dewey"})
    elif key == "084__":
        sub_2 = clean_val("2", value, str)
        if sub_2 and sub_2.upper() in SUBJECT_CLASSIFICATION_EXCEPTIONS:
            keywords(self, key, value)
            raise IgnoreKey("subjects")
        else:
            _subject_classification.update({"scheme": "ICS"})
    elif key.startswith("050"):
        _subject_classification.update({"scheme": "LoC"})
    if _subject_classification not in prev_subjects:
        return _subject_classification
    else:
        raise IgnoreKey("subjects")
Exemplo n.º 17
0
def barcodes(self, key, value):
    """Match barcodes of items to volumes."""
    val_n = clean_val("n", value, str)
    val_x = clean_val("x", value, str)

    _migration = self["_migration"]
    _migration["volumes"].append(
        dict(
            volume=extract_volume_number(val_n),
            barcode=val_x,
        ))
    raise IgnoreKey("barcodes")
Exemplo n.º 18
0
def created(self, key, value):
    """Translates created information to fields."""
    _created_by = self.get("created_by", {})
    date_value = clean_val("x", value, int, regex_format=r"\d{8}$")
    if date_value:
        year, week, day = str(date_value)[:4],\
                          str(date_value)[4:6],\
                          str(date_value)[6:8]
        date = datetime.date(int(year), int(week), int(day))
        return date.isoformat()

    raise IgnoreKey("_created")
Exemplo n.º 19
0
def languages(self, key, value):
    """Translates languages fields."""
    lang = clean_val("b", value, str).lower()
    _languages = self.get("languages", [])
    try:
        new_lang = pycountry.languages.lookup(lang).alpha_3.upper()
        if new_lang not in _languages:
            return new_lang
        else:
            raise IgnoreKey("languages")
    except (KeyError, AttributeError, LookupError):
        raise UnexpectedValue(subfield="a")
Exemplo n.º 20
0
def book_series(self, key, value):
    """Match barcodes to volumes."""
    val_n = clean_val("n", value, str)
    val_x = clean_val("x", value, str)

    _migration = self["_migration"]
    _migration["serials"].append({
        "title": clean_val("a", value, str),
        "volume": clean_val("v", value, str),
        "issn": val_x,
    })
    _migration["has_serial"] = True
    raise IgnoreKey("book_series")
Exemplo n.º 21
0
def number_of_pages(self, key, value):
    """Translates number_of_pages fields."""
    val_x = clean_val("x", value, str)
    val_a = clean_val("a", value, str)
    if val_x:
        if val_x == "volume":
            raise IgnoreKey("number_of_pages")
        elif val_x.lower() in ["phys.desc.", "phys.desc"]:
            self["physical_description"] = val_a
            raise IgnoreKey("number_of_pages")
    else:
        if is_excluded(val_a):
            raise IgnoreKey("number_of_pages")

        parts = extract_parts(val_a)
        if parts["has_extra"]:
            raise UnexpectedValue(subfield="a")
        if parts["physical_description"]:
            self["physical_description"] = parts["physical_description"]
        if parts["number_of_pages"]:
            return str(parts["number_of_pages"])
        raise UnexpectedValue(subfield="a")
Exemplo n.º 22
0
def number_of_volumes(self, key, value):
    """Translates number of volumes."""
    _series_title = self.get("title", None)
    if not _series_title:
        raise MissingRequiredField(
            subfield="a", message=" this record is missing a main title"
        )
    val_a = clean_val("a", value, str)
    parsed_a = extract_parts(val_a)
    if not parsed_a["number_of_pages"] and ("v" in val_a or "vol" in val_a):
        _volumes = re.findall(r"\d+", val_a)
        if _volumes:
            return _volumes[0]
    raise IgnoreKey("number_of_volumes")
Exemplo n.º 23
0
def number_of_pages(self, key, value):
    """Translates number_of_pages fields."""
    val = clean_val("a", value, str)
    if is_excluded(val):
        raise IgnoreKey("number_of_pages")

    parts = extract_parts(val)
    if parts["has_extra"]:
        raise UnexpectedValue(subfield="a")
    if parts["physical_copy_description"]:
        self["physical_copy_description"] = parts["physical_copy_description"]
    if parts["number_of_pages"]:
        return str(parts["number_of_pages"])
    raise UnexpectedValue(subfield="a")
Exemplo n.º 24
0
def created(self, key, value):
    """Translates created information to fields."""
    _created_by = self.get("created_by", {})
    if key == "916__":
        if "s" in value:
            _created_by.update({
                "type":
                mapping(
                    ACQUISITION_METHOD,
                    clean_val("s", value, str, default="migration"),
                    raise_exception=True,
                )
            })
            self["created_by"] = _created_by
            date = clean_val("w", value, int, regex_format=r"\d{6}$")
            if date:
                year, week = str(date)[:4], str(date)[4:]
                date = get_week_start(int(year), int(week))
                return date.isoformat()
    elif key == "595__":
        try:
            sub_a = clean_val("a",
                              value,
                              str,
                              regex_format=r"[A-Z]{3}[0-9]{6}$")
            if sub_a:
                source = sub_a[:3]
                self["source"] = source
                year, month = int(sub_a[3:7]), int(sub_a[7:])
                self["_created"] = datetime.date(year, month, 1).isoformat()
                raise IgnoreKey("_created")
        except UnexpectedValue as e:
            e.subfield = "a"
            self["internal_notes"] = internal_notes(self, key, value)
            raise IgnoreKey("_created")

    raise IgnoreKey("_created")
Exemplo n.º 25
0
def multivolume_record_format(self, key, value):
    """Multivolume kind."""
    val_a = clean_val("a", value, str)
    _migration = self["_migration"]
    if val_a == "MULTIVOLUMES-1":
        parsed = True
    elif val_a == "MULTIVOLUMES-X" or val_a == "MULTIVOLUMES-x":
        parsed = False
    elif val_a == "MULTIVOLUMES-MANUAL":
        raise Exception("This record should not be migrated!")
    else:
        raise UnexpectedValue(
            subfield="a", message=" unrecognized migration multipart tag"
        )
    _migration["multivolume_record_format"] = parsed
    raise IgnoreKey("multivolume_record_format")
Exemplo n.º 26
0
def special_serials(self, key, value):
    """Translates serial fields."""
    _migration = self["_migration"]
    _serials = _migration.get("serials", [])
    for v in force_list(value):
        result_a = mapping(SERIAL, clean_val("a", v, str))
        if result_a:
            _serials.append({
                "title": result_a,
                "volume": None,
                "issn": None,
            }) if result_a not in _serials else None
            _migration.update({"serials": _serials, "has_serial": True})
        if not result_a:
            self["document_type"] = document_type(self, key, value)
            raise IgnoreKey("_migration")
    return _migration
Exemplo n.º 27
0
def collection(self, key, value):
    """Translates collection field - WARNING - also document type field."""
    _migration = self["_migration"]
    _tags = _migration["tags"]
    for v in force_list(value):
        result_a = mapping(COLLECTION, clean_val("a", v, str))
        result_b = mapping(COLLECTION, clean_val("b", v, str))
        if result_a:
            _tags.append(result_a) if result_a not in _tags else None
            _migration["has_tags"] = True
        if result_b:
            _tags.append(result_b) if result_b not in _tags else None
            _migration["has_tags"] = True
        if not result_a and not result_b:
            self["document_type"] = document_type(self, key, value)
            raise IgnoreKey("_migration")
    return _migration
Exemplo n.º 28
0
def project_id(self, key, value):
    """Report number."""
    values = force_list(value)
    project_id = None
    related_links = self.get('related_links', [])
    for value in values:
        related_link = {}
        if 'p' in value and 'u' in value:
            related_link['name'] = value.get('p')
            related_link['url'] = value.get('u')
            related_links.append(related_link)
        else:
            project_id = value.get('u')
    if related_links:
        self['related_links'] = related_links
    if not project_id:
        raise IgnoreKey('project_id')
    return project_id
Exemplo n.º 29
0
def alternative_titles_doc(self, key, value):
    """Alternative titles."""
    _alternative_titles = self.get("alternative_titles", [])

    if key == "242__":
        _alternative_titles += alternative_titles(self, key, value)
    elif key == "246__":
        if ("n" in value and "p" not in value) or ("n" not in value
                                                   and "p" in value):
            raise MissingRequiredField(subfield="n or p")

        if "p" in value:
            _migration = self.get("_migration", {})
            if "volumes" not in _migration:
                _migration["volumes"] = []

            val_n = clean_val("n", value, str)
            _migration["volumes"].append({
                "volume":
                extract_volume_number(val_n, raise_exception=True),
                "title":
                clean_val("p", value, str),
            })
            _migration["is_multipart"] = True
            _migration["record_type"] = "multipart"
            self["_migration"] = _migration
            raise IgnoreKey("alternative_titles")
        else:
            if "a" in value:
                _alternative_titles.append({
                    "value":
                    clean_val("a", value, str, req=True),
                    "type":
                    "ALTERNATIVE_TITLE",
                })
            if "b" in value:
                _alternative_titles.append({
                    "value":
                    clean_val("b", value, str, req=True),
                    "type":
                    "SUBTITLE",
                })
            return _alternative_titles
Exemplo n.º 30
0
def corporate_authors(self, key, value):
    """Translates the corporate authors field."""
    _corporate_authors = self.get("authors", [])

    for v in force_list(value):
        if key == "710__":
            if "a" in v:
                _corporate_authors.append({
                    "full_name": clean_val("a", v, str),
                    "type": "ORGANISATION",
                })
            else:
                self["authors"] = collaborations(self, key, value)
                raise IgnoreKey("corporate_authors")
        else:
            _corporate_authors.append({
                "full_name": clean_val("a", v, str),
                "type": "ORGANISATION"
            })
    return _corporate_authors