def get_contribution_link(bibid, reroid, id, key, value): """Get MEF contribution link.""" # https://mef.test.rero.ch/api/mef/?q=rero.rero_pid:A012327677 prod_host = 'mef.rero.ch' test_host = os.environ.get('RERO_ILS_MEF_HOST', 'mef.rero.ch') mef_url = 'https://{host}/api/'.format(host=test_host) match = IDREF_REF_REGEX.search(id) if match: pid = match.group(1) if key[:3] in ['100', '600', '610', '611', '700', '710', '711']: # contribution url = "{mef}idref/{pid}".format(mef=mef_url, pid=pid) try: request = requests.get(url=url) except requests.exceptions.RequestException as err: error_print('ERROR MEF ACCESS:', bibid, reroid, url, err) return None if request.status_code == requests.codes.ok: return url.replace(test_host, prod_host) else: subfiels = [] for v, k in value.items(): if v != '__order__': subfiels.append('${v} {k}'.format(v=v, k=k)) subfiels = ' '.join(subfiels) field = '{key} {subfiels}'.format(key=key, subfiels=subfiels) error_print('WARNING MEF CONTRIBUTION IDREF NOT FOUND:', bibid, reroid, field, url, request.status_code)
def get_language_script(script): """Build the language script code. This code is built according to the format <lang_code>-<script_code> for example: chi-hani; the <lang_code> is retrived from field 008 and 041 the <script_code> is received as parameter """ languages_scripts = { 'arab': ('ara', 'per'), 'cyrl': ('bel', 'chu', 'mac', 'rus', 'srp', 'ukr'), 'grek': ('grc', 'gre'), 'hani': ('chi', 'jpn'), 'hebr': ('heb', 'lad', 'yid'), 'jpan': ('jpn', ), 'kore': ('kor', ), 'zyyy': ('chi', ) } if script in languages_scripts: languages = ([marc21.lang_from_008] + marc21.langs_from_041_a + marc21.langs_from_041_h) for lang in languages: if lang in languages_scripts[script]: return '-'.join([lang, script]) error_print('WARNING LANGUAGE SCRIPTS:', marc21.bib_id, script, '008:', marc21.lang_from_008, '041$a:', marc21.langs_from_041_a, '041$h:', marc21.langs_from_041_h) return '-'.join(['und', script])
def marc21_to_electronicLocator_from_field_856(self, key, value): """Get electronicLocator from field 856.""" if value.get('u'): electronic_locator_type = { '0': 'resource', '1': 'versionOfResource', '2': 'relatedResource', '8': 'hiddenUrl' } electronic_locator_content = [ 'poster', 'audio', 'postcard', 'addition', 'debriefing', 'exhibitionDocumentation', 'erratum', 'bookplate', 'extract', 'educationalSheet', 'illustrations', 'coverImage', 'deliveryInformation', 'biographicalInformation', 'introductionPreface', 'classReading', "teachersKit", "publishersNote", 'noteOnContent', 'titlePage', 'photography', 'summarization' "summarization", "onlineResourceViaRERODOC", "pressReview", "webSite", "tableOfContents", "fullText", "video" ] electronic_locators = self.get('electronicLocator', []) indicator2 = key[4] content = None if value.get('3'): content = utils.force_list(value.get('3'))[0] public_note = [] if content and content not in electronic_locator_content: public_note.append(content) if value.get('z'): for subfield_z in utils.force_list(value.get('z')): public_note.append(subfield_z) for url in utils.force_list(value.get('u')): electronic_locator = { 'url': url, 'type': electronic_locator_type.get(indicator2, 'noInfo') } if content: if content in electronic_locator_content: electronic_locator['content'] = content if public_note: electronic_locator['publicNote'] = public_note if len(electronic_locator['url']) >= 7: electronic_locators.append(electronic_locator) else: error_print('WARNING ELECTRONICLOCATOR:', marc21.bib_id, marc21.rero_id, electronic_locator['url']) return electronic_locators or None
def get_person_link(bibid, reroid, id, key, value): """Get MEF person link.""" # https://mef.test.rero.ch/api/mef/?q=rero.rero_pid:A012327677 prod_host = 'mef.rero.ch' test_host = os.environ.get('RERO_ILS_MEF_HOST', 'mef.rero.ch') mef_url = 'https://{host}/api/'.format(host=test_host) mef_link = None try: identifier = id[1:].split(')') url = "{mef}mef/?q={org}.pid:{pid}".format(mef=mef_url, org=identifier[0].lower(), pid=identifier[1]) request = requests.get(url=url) if request.status_code == requests.codes.ok: pid = None data = request.json() hits = data.get('hits', {}).get('hits') if hits: idref = hits[0].get('metadata', {}).get('idref') gnd = hits[0].get('metadata', {}).get('gnd') rero = hits[0].get('metadata', {}).get('rero') if idref: pid_type = 'idref' pid = idref['pid'] elif gnd: pid_type = 'gnd' pid = gnd['pid'] elif rero: pid_type = 'rero' pid = rero['pid'] if pid: mef_link = "{url}{pid_type}/{pid}".format(url=mef_url, pid_type=pid_type, pid=pid) mef_link = mef_link.replace(test_host, prod_host) else: error_print('ERROR MEF REQUEST:', bibid, reroid, url, request.status_code) except Exception as err: error_print('WARNING NOT MEF REF:', bibid, id, key, value, err) return mef_link
def marc21_to_language(self, key, value): """Get languages. languages: 008 and 041 [$a, repetitive] """ lang_codes = [] language = self.get('language', []) if marc21.lang_from_008: language.append({'value': marc21.lang_from_008, 'type': 'bf:Language'}) lang_codes.append(marc21.lang_from_008) for lang_value in marc21.langs_from_041_a: if lang_value not in lang_codes: language.append({ 'value': lang_value.strip(), 'type': 'bf:Language' }) lang_codes.append(lang_value) # default provisionActivity if we have no 264 fields_264 = marc21.get_fields(tag='264') valid_264 = False for field_264 in fields_264: valid_264 = valid_264 or field_264['ind2'] in ['0', '1', '2', '3'] if not valid_264: if fields_264: error_print('WARNING INVALID 264', marc21.bib_id, marc21.rero_id, fields_264) self['provisionActivity'] = [{'type': 'bf:Publication'}] if (marc21.date_type_from_008 == 'q' or marc21.date_type_from_008 == 'n'): self['provisionActivity'][0][ 'note'] = 'Date(s) uncertain or unknown' start_date = make_year(marc21.date1_from_008) if not start_date or start_date > 2050: error_print('WARNING START DATE 008:', marc21.bib_id, marc21.rero_id, marc21.date1_from_008) start_date = 2050 self['provisionActivity'][0][ 'note'] = 'Date not available and automatically set to 2050' self['provisionActivity'][0]['startDate'] = start_date end_date = make_year(marc21.date2_from_008) if end_date: if end_date > 2050: error_print('WARNING END DATE 008:', marc21.bib_id, marc21.rero_id, marc21.date1_from_008) else: self['provisionActivity'][0]['endDate'] = end_date # if not language: # error_print('ERROR LANGUAGE:', marc21.bib_id, 'set to "und"') # language = [{'value': 'und', 'type': 'bf:Language'}] return language or None
def marc21_to_contribution(self, key, value): """Get contribution.""" if not key[4] == '2' and key[:3] in ['100', '700', '710', '711']: agent = {} if value.get('0'): refs = utils.force_list(value.get('0')) for ref in refs: ref = get_person_link(marc21.bib_id, marc21.rero_id, ref, key, value) if ref: agent['$ref'] = ref # we do not have a $ref if not agent.get('$ref') and value.get('a'): agent = {'type': 'bf:Person'} if value.get('a'): name = not_repetitive(marc21.bib_id, marc21.rero_id, key, value, 'a').rstrip('.') if name: agent['preferred_name'] = name # 100|700 Person if key[:3] in ['100', '700']: if value.get('b'): numeration = not_repetitive(marc21.bib_id, marc21.rero_id, key, value, 'b') numeration = remove_trailing_punctuation(numeration) if numeration: agent['numeration'] = numeration if value.get('c'): qualifier = not_repetitive(marc21.bib_id, marc21.rero_id, key, value, 'c') agent['qualifier'] = remove_trailing_punctuation(qualifier) if value.get('d'): date = not_repetitive(marc21.bib_id, marc21.rero_id, key, value, 'd') date = date.rstrip(',') dates = remove_trailing_punctuation(date).split('-') try: date_of_birth = dates[0].strip() if date_of_birth: agent['date_of_birth'] = date_of_birth except Exception: pass try: date_of_death = dates[1].strip() if date_of_death: agent['date_of_death'] = date_of_death except Exception: pass if value.get('q'): fuller_form_of_name = not_repetitive( marc21.bib_id, marc21.rero_id, key, value, 'q') fuller_form_of_name = remove_trailing_punctuation( fuller_form_of_name).lstrip('(').rstrip(')') if fuller_form_of_name: agent['fuller_form_of_name'] = fuller_form_of_name # 710|711 Organisation elif key[:3] in ['710', '711']: agent['type'] = 'bf:Organisation' if key[:3] == '711': agent['conference'] = True else: agent['conference'] = False if value.get('b'): subordinate_units = [] for subordinate_unit in utils.force_list(value.get('b')): subordinate_units.append(subordinate_unit.rstrip('.')) agent['subordinate_unit'] = subordinate_units if value.get('e'): subordinate_units = agent.get('subordinate_unit', []) for subordinate_unit in utils.force_list(value.get('e')): subordinate_units.append(subordinate_unit.rstrip('.')) agent['subordinate_unit'] = subordinate_units if value.get('n'): conference_number = not_repetitive(marc21.bib_id, marc21.rero_id, key, value, 'n') conference_number = remove_trailing_punctuation( conference_number).lstrip('(').rstrip(')') if conference_number: agent['conference_number'] = conference_number if value.get('d'): conference_date = not_repetitive(marc21.bib_id, marc21.rero_id, key, value, 'd') conference_date = remove_trailing_punctuation( conference_date).lstrip('(').rstrip(')') if conference_date: agent['conference_date'] = conference_date if value.get('c'): conference_place = not_repetitive(marc21.bib_id, marc21.rero_id, key, value, 'c') conference_place = remove_trailing_punctuation( conference_place).lstrip('(').rstrip(')') if conference_place: agent['conference_place'] = conference_place if value.get('4'): roles = [] for role in utils.force_list(value.get('4')): if len(role) != 3: error_print('WARNING CONTRIBUTION ROLE LENGTH:', marc21.bib_id, marc21.rero_id, role) role = role[:3] if role == 'sce': error_print('WARNING CONTRIBUTION ROLE SCE:', marc21.bib_id, marc21.rero_id, 'sce --> aus') role = 'aus' role = role.lower() if role not in _CONTRIBUTION_ROLE: error_print('WARNING CONTRIBUTION ROLE DEFINITION:', marc21.bib_id, marc21.rero_id, role) role = 'ctb' roles.append(role) else: if key[:3] == '100': roles = ['cre'] elif key[:3] == '711': roles = ['aut'] else: roles = ['ctb'] if agent: return {'agent': agent, 'role': list(set(roles))} return None
def marc21_to_type_and_issuance(self, key, value): """ Get document type and the mode of issuance. Books: LDR/6-7: am Journals: LDR/6-7: as Articles: LDR/6-7: aa Scores: LDR/6: c|d Videos: LDR/6: g + 007/0: m|v Sounds: LDR/6: i|j E-books (imported from Cantook) """ # get the document type type = 'other' if marc21.record_type == 'a': if marc21.bib_level == 'm': type = 'book' elif marc21.bib_level == 's': type = 'journal' elif marc21.bib_level == 'a': type = 'article' elif marc21.record_type in ['c', 'd']: type = 'score' elif marc21.record_type in ['i', 'j']: type = 'sound' elif marc21.record_type == 'g': type = 'video' # Todo 007 self['type'] = type # get the mode of issuance self['issuance'] = {} main_type = _ISSUANCE_MAIN_TYPE_PER_BIB_LEVEL.get(marc21.bib_level, 'rdami:1001') sub_type = 'NOT_DEFINED' error = False if marc21.bib_level == 'm': if marc21.is_top_level_record: main_type = 'rdami:1002' sub_type = 'set' else: sub_type = 'materialUnit' else: if marc21.bib_level in _ISSUANCE_SUBTYPE_PER_BIB_LEVEL: sub_type = _ISSUANCE_SUBTYPE_PER_BIB_LEVEL[marc21.bib_level] elif marc21.serial_type in _ISSUANCE_SUBTYPE_PER_SERIAL_TYPE: sub_type = _ISSUANCE_SUBTYPE_PER_SERIAL_TYPE[marc21.serial_type] if main_type == 'rdami:1001': if sub_type not in [ 'article', 'materialUnit', 'privateFile', 'privateSubfile' ]: error = True sub_type = 'materialUnit' elif main_type == 'rdami:1002': if sub_type not in [ 'set', 'partIndependentTitle', 'partDependantTitle' ]: error = True sub_type = 'set' elif main_type == 'rdami:1003': if sub_type not in [ 'serialInSerial', 'monographicSeries', 'periodical' ]: error = True sub_type = 'periodical' elif main_type == 'rdami:1004': if sub_type not in ['updatingWebsite', 'updatingLoose-leaf']: error = True sub_type = 'updatingWebsite' if error: error_print('WARNING ISSUANCE:', marc21.bib_id, marc21.rero_id, main_type, sub_type, marc21.bib_level, marc21.serial_type) self['issuance'] = {'main_type': main_type, 'subtype': sub_type}