def format(bfo): """ Print Award. """ total_output = [] for award_field in bfo.fields('586__a', escape=2): if award_field and award_field.strip(): if not ',' in award_field: award_name = award_field.strip() award_name = get_kb_mapping(kb_name='awards', key=award_name, default=award_name) if type(award_name) == dict: award_name = award_name['value'] total_output.append(award_name) award_name = ','.join(award_field.split(',')[:-1]) award_year = award_field.split(',')[-1].strip() award_name = get_kb_mapping(kb_name='awards', key=award_name, default=award_name) if type(award_name) == dict: award_name = award_name['value'] total_output.append('%s, %s' % (award_name, award_year)) return "<br>".join(total_output)
def format(bfo, brief="no"): """ Print host (Order: Name of publisher, place of publication and date of publication). """ _ = gettext_set_language(bfo.lang) output = [] is_epfl_thesis = bfo.field("980__a") == 'THESIS' and bfo.field( "973__a") == 'EPFL' if not is_epfl_thesis: #return '(%s)' % bfo.field('260__c') return '' thesis_number = bfo.field('088__a', escape=2) if brief != "yes": output = _( "Thèse École polytechnique fédérale de Lausanne EPFL, n° %(number)s (%(year)s)" ) output %= {'number': thesis_number, 'year': bfo.field('920__b')} else: return "Thèse EPFL, n° %(number)s (%(year)s)" % { 'number': thesis_number, 'year': bfo.field('920__b', escape=2) } output += '<br />' hierarchy = [] if bfo.field('918__d'): hierarchy.append( get_kb_mapping(kb_name='doctoral-fre', key=bfo.field('918__d'))) if bfo.field('918__b'): hierarchy.append( get_kb_mapping(kb_name='section-fre', key=bfo.field('918__b'))) if bfo.field('918__a'): hierarchy.append( get_kb_mapping(kb_name='school-fre', key=bfo.field('918__a'))) if bfo.field('918__c'): hierarchy.append( get_kb_mapping(kb_name='institute-fre', key=bfo.field('918__c'))) if bfo.fields('919__a'): hierarchy.extend([ get_kb_mapping(kb_name='theses-units-fre', key=unit) for unit in bfo.fields('919__a') ]) if bfo.field('502__a'): hierarchy.append({'value': 'Jury: ' + bfo.field('502__a')}) output += '<br />'.join([elem['value'] for elem in hierarchy if elem]) return output
def _tokenize_from_country_name_tag(self, instID): """Get country name and country code tokens reading the country_name_tag tag from record instID. Returns a list of tokens (empty if something fails) """ tokens = [] record = get_record(instID) # Read the country name tags that are not marked as secondary country_name_list = [] for field in record[self.address_field]: if "secondary" not in field.get_subfield_values( self.secondary_address_subfield): country_name_list += field.get_subfield_values( self.country_name_subfield ) country_name_list = [s.encode('utf-8') for s in country_name_list] for country_name in country_name_list: # Find the country code using KB kb_country_code = get_kb_mapping( kb_name=self.kb_country_codes, value=country_name ) if kb_country_code: country_code = kb_country_code["key"] if country_name and country_code: tokens += [country_name, country_code] return tokens
def _getter(field): if field.data: val = get_kb_mapping('json_projects', str(field.data)) if val: data = json_projects_kb_mapper(val) return data['fields'][key_name] return ''
def format_element(bfo, resource_type="photo"): """ Used to put a copyright overlay on top of a resource (photo, video, ..) @param resource_type: the type of the resource the overlay will be placed on top of """ if resource_type != 'photo': #not implement return '' output = """ <script type="text/javascript"> hs.creditsText = '© %(credit_text)s'; hs.creditsHref = '%(credit_url)s'; hs.creditsTitle = 'The use of photos requires prior authorization from %(credit_text)s'; </script>""" # There might be more that one copyright and licence, select the one that # applies to the record, not a single file (one without '8' subfield) copyrights = bfo.fields('542__') copyright_holder = "" copyright_url = "" for copyright in copyrights: if not copyright.get('8', None): copyright_holder = copyright.get('d') copyright_url = copyright.get('u') break licences = bfo.fields('540__') licence = "" for lic in licences: if not lic.get('8', None): licence = lic.get('a') break if licence.startswith('CC-BY'): return """ <script type="text/javascript"> hs.creditsText = '%(credit_text)s'; hs.creditsHref = '%(credit_url)s'; hs.creditsTitle = '%(credit_text)s'; </script>""" % {'credit_text': licence, 'credit_url' : get_kb_mapping(kb_name='LICENSE2URL', key=licence)['value']} if not copyright_holder: copyright_holder = 'CERN' if copyright_holder == 'CERN' and not copyright_url: copyright_url = CFG_CERN_LICENSE_URL if copyright_holder == 'CERN': output += """ <script type="text/javascript" src="/js/overlay.min.js"></script> <script type="text/javascript" src="/js/copyright_notice.min.js"></script> <link href="/img/overlay.css" type="text/css" rel="stylesheet" /> """ return output % {'credit_text': copyright_holder, 'credit_url': copyright_url}
def grants_validator(form, field): if field.data: for item in field.data: val = get_kb_mapping('json_projects', str(item['id'])) if val: data = json_projects_kb_mapper(val) item['acronym'] = data['fields']['acronym'] item['title'] = data['fields']['title'] continue raise ValidationError("Invalid grant identifier %s" % item['id'])
def process_recjson(deposition, recjson): """ Process exported recjson (common for both new and edited records) """ # =========== # Communities # =========== # FIXME: Properly deal with provisional/non-provisional try: # Extract identifier (i.e. elements are mapped from dict -> # string) recjson['provisional_communities'] = list(set(map( lambda x: x['identifier'], recjson.get('provisional_communities', []) ))) recjson['communities'] = list(set(map( lambda x: x['identifier'], recjson.get('communities', []) ))) except TypeError: # Happens on re-run pass # ================= # License # ================= if recjson['access_right'] in ["open", "embargoed"]: info = get_kb_mapping(CFG_LICENSE_KB, str(recjson['license'])) if info: info = json.loads(info['value']) recjson['license'] = dict( identifier=recjson['license'], source=CFG_LICENSE_SOURCE, license=info['title'], url=info['url'], ) elif 'license' in recjson: del recjson['license'] # ======================= # Journal # ======================= # Set year or delete fields if no title is provided if recjson.get('journal.title', None): recjson['journal.year'] = recjson['publication_date'].year # ======================= # Book/chaper/report # ======================= if 'imprint.publisher' in recjson and 'imprint.place' in recjson: recjson['imprint.year'] = recjson['publication_date'].year if 'part_of.title' in recjson: mapping = [ ('part_of.publisher', 'imprint.publisher'), ('part_of.place', 'imprint.place'), ('part_of.year', 'imprint.year'), ('part_of.isbn', 'isbn'), ] for new, old in mapping: if old in recjson: try: recjson[new] = recjson[old] del recjson[old] except KeyError: pass # ================= # Grants # ================= # Remap incoming dictionary recjson['grants'] = map( lambda x: dict( title="%s - %s (%s)" % (x['acronym'], x['title'], x['id']), identifier=x['id'] ), recjson.get('grants', []) ) # ======================= # Filter out empty fields # ======================= filter_empty_elements(recjson) return recjson
def process_recjson(deposition, recjson): """ Process exported recjson (common for both new and edited records) """ # =========== # Communities # =========== # FIXME: Properly deal with provisional/non-provisional try: # Extract identifier (i.e. elements are mapped from dict -> # string) recjson['provisional_communities'] = list( set( map(lambda x: x['identifier'], recjson.get('provisional_communities', [])))) recjson['communities'] = list( set(map(lambda x: x['identifier'], recjson.get('communities', [])))) except TypeError: # Happens on re-run pass # ================= # License # ================= if recjson['access_right'] in ["open", "embargoed"]: info = get_kb_mapping(CFG_LICENSE_KB, str(recjson['license'])) if info: info = json.loads(info['value']) recjson['license'] = dict( identifier=recjson['license'], source=CFG_LICENSE_SOURCE, license=info['title'], url=info['url'], ) elif 'license' in recjson: del recjson['license'] # ======================= # Journal # ======================= # Set year or delete fields if no title is provided if recjson.get('journal.title', None): recjson['journal.year'] = recjson['publication_date'].year # ======================= # Book/chaper/report # ======================= if 'imprint.publisher' in recjson and 'imprint.place' in recjson: recjson['imprint.year'] = recjson['publication_date'].year if 'part_of.title' in recjson: mapping = [ ('part_of.publisher', 'imprint.publisher'), ('part_of.place', 'imprint.place'), ('part_of.year', 'imprint.year'), ('part_of.isbn', 'isbn'), ] for new, old in mapping: if old in recjson: try: recjson[new] = recjson[old] del recjson[old] except KeyError: pass # ================= # Grants # ================= # Remap incoming dictionary recjson['grants'] = map( lambda x: dict(title="%s - %s (%s)" % (x['acronym'], x['title'], x['id']), identifier=x['id']), recjson.get('grants', [])) # ======================= # Filter out empty fields # ======================= filter_empty_elements(recjson) return recjson
def format_element(bfo, magnify='yes', check_existence='yes', source="auto", display_name="no", display_reference="yes", display_description="yes", display_comment="yes", display_tirage="yes", submission_doctype=""): """ Prints html image and link to photo resources, if 8567 exists print only 8567 otherwise if exists 8564. @param magnify If 'yes', images will be magnified when mouse is over images @param check_existence if 'yes' check that file is reachable @param source where to look for photos. Possible values are 'mediaarchive', 'doc', 'bibdoc' or 'auto' (check everywhere) """ out = "" rec_is_restricted = bfo.recID in get_all_restricted_recids() # Hack to know about copyright while we do not have this stored in # the metatada. copyright_prefix = '' report_number = bfo.field('037__a') author = bfo.field('100__a').lower() if report_number.startswith('ATL') or \ 'claudia marcelloni' in author or \ 'atlas' in author or \ 'joao pequenao' in author or \ 'tiina wickstroem' in author or \ 'nikolai topilin' in author: copyright_prefix = '<br/>The ATLAS Experiment ' cond_of_use = '''<a href="http://copyright.cern.ch/">Conditions of Use</a> ''' if bfo.field('540__u') or bfo.field('542__u') or bfo.field('542__d') != 'CERN' or bfo.field('540__a'): cond_of_use = '' # Check if image is under creative commons license creative_commons = False if bfo.field('540__a').startswith('CC-BY'): creative_commons = True out += '<div about="%s" rev="license">' % get_kb_mapping(kb_name='LICENSE2URL', key=bfo.field('540__a'))['value'] multimedia = {} if source in ['auto', 'mediaarchive']: multimedia = get_media(bfo, check_existence=(check_existence.lower() == 'yes')) # Also append master information to the multimedia structure masters = get_media(bfo, path_code='d', internal_note_code='x', check_existence=(check_existence.lower() == 'yes')) for (tirage, info) in masters.iteritems(): if multimedia.has_key(tirage): multimedia[tirage]['master'] = info['master'] if multimedia != {} and source in ['auto', 'mediaarchive']: out += '''<center><small><strong>%s%s</strong></small></center><br />''' % (cond_of_use, bfe_copyright.format_element(bfo) or '© CERN') out += '''<center><small><a href="%(CFG_SITE_URL)s/help/high-res-multimedia?ln=%(ln)s">%(label)s</a></small></center>''' % \ {'CFG_SITE_URL': CFG_SITE_URL, 'ln': bfo.lang, 'label': bfo.lang == "fr" and 'Besoin d\'aide pour accéder aux photos en haute résolution?' or \ 'Need help to download high-resolutions?'} mediaarchive_pictures = print_images(multimedia=multimedia, magnify=magnify, reference=bfo.field('037__a'), bfo=bfo) if len(multimedia) > 1 and not rec_is_restricted: # we have at least 2 photos out += generate_view_button(bfo, report_number, mediaarchive_pictures) else: out += mediaarchive_pictures out += '''<script type="text/javascript"> window.onload = function() { if (location.hash != ''){ var pic = document.getElementById('thumb'+location.hash.substring(1)); if (pic != null){ hs.expand(pic) } } } </script>''' elif not source in ['mediaarchive']: out += '''<center><small><strong>%s%s</strong></small></center><br />''' % (cond_of_use, bfe_copyright.format_element(bfo) or '© CERN') bibdoc_pictures = get_bibdoc_pictures(bfo, display_name, display_reference, display_description, display_comment, display_tirage, submission_doctype) if bibdoc_pictures and source in ['auto', 'bibdoc']: if bibdoc_pictures.count('<img ') > 1 and not rec_is_restricted:# we have at least 1 photo out += generate_view_button(bfo, report_number, bibdoc_pictures) else: out += bibdoc_pictures elif source in ['auto', 'doc']: # Use picture from doc out += get_doc_pictures(bfo) if creative_commons: out += '</div>' return out
def generate_mediaexport(recid, is_image, resource_id, tirage, wrapped, json_format=True): """Generates the JSON with the info needed to export a media resource to CERN-Drupal""" """Mandatory fields to export: title_en, title_fr, caption_en, caption_fr, copyright_holder, copyright_date, attribution (image), keywords (image), directors (video), producer (video) """ MEDIA_CONFIG = {'title_en': ('245', ' ', ' ', 'a'), 'title_fr': ('246', ' ', '1', 'a'), 'keywords': ('653', '1', ' ', 'a'), 'copyright_holder': ('542', ' ', ' ', 'd'), 'copyright_date': ('542', ' ', ' ', 'g'), 'license_url': ('540', ' ', ' ', 'a'), 'license_desc': ('540', ' ', ' ', 'b'), 'license_body': ('540', ' ', ' ', 'u'), 'author': ('100', ' ', ' ', 'a'), 'affiliation': ('100', ' ', ' ', 'u'), 'directors': ('700', ' ', ' ', 'a'), 'video_length': ('300', ' ', ' ', 'a'), 'language': ('041', ' ', ' ', 'a'), 'creation_date': ('269', ' ', ' ', 'c'), 'abstract_en': ('520', ' ', ' ', 'a'), 'abstract_fr': ('590', ' ', ' ', 'a')} entry = {} record = get_record(recid) for key in MEDIA_CONFIG: entry[key] = record_get_field_value(record, *MEDIA_CONFIG[key])#.encode('utf-8') entry['id'] = resource_id entry['record_id'] = str(recid) entry['type'] = is_image and "image" or "video" entry['entry_date'] = get_creation_date(recid) toc_recid = 0 toc_record = {} if not is_image and 'asset' in record_get_field_value(record, *('970', ' ', ' ', 'a')): toc_repnum = record_get_field_value(record, *('773', ' ', ' ', 'r')) if toc_repnum: try: toc_recid = search_pattern(p='reportnumber:"%s"' %toc_repnum)[0] except IndexError: pass #corner cases for copyright & licence if not entry['copyright_holder']: entry['copyright_holder'] = 'CERN' if not entry['license_body']: entry['license_body'] = 'CERN' if not entry['license_desc']: entry['license_desc'] = 'CERN' if not entry['license_url']: from invenio.bibknowledge import get_kb_mapping try: entry['license_url'] = get_kb_mapping(kb_name='LICENSE2URL', key=entry['license_desc'])['value'] except KeyError: pass #keywords entry['keywords'] = ','.join(record_get_field_values(record, *MEDIA_CONFIG['keywords'])) #attribution if not entry.get('author', '') and not entry.get('attribution', '') and toc_recid > 0: if not toc_record: toc_record = get_record(toc_recid) entry['author'] = record_get_field_value(toc_record, *MEDIA_CONFIG['author']) entry['affiliation'] = record_get_field_value(toc_record, *MEDIA_CONFIG['affiliation']) if not entry.get('directors', ''): entry['directors'] = ','.join(record_get_field_values(toc_record, *MEDIA_CONFIG['directors'])) #photos if is_image: if entry['author']: entry['attribution'] = entry['author'] if entry['affiliation']: entry['attribution'] += ': %s' % entry['affiliation'] del entry['directors'] else: #videos if entry['author']: entry['producer'] = entry['author'] # Get all files from record files_field = ('856', '7', ' ', 'u') # Filter all that are images thumbnails = [ image for image in record_get_field_values(record, *files_field) if 'jpg' in image ] # If exists get the first one if thumbnails: entry['thumbnail'] = thumbnails[0] del entry['author'] del entry['affiliation'] # #title if not entry['title_en'] and not entry['title_fr'] and toc_recid > 0: if not toc_record: toc_record = get_record(toc_recid) entry['title_en'] = record_get_field_value(toc_record, *MEDIA_CONFIG['title_en']) entry['title_fr'] = record_get_field_value(toc_record, *MEDIA_CONFIG['title_fr']) #crop, media storage, caption if is_image: entry['file_params'] = {'size': ['small', 'medium', 'large'], 'crop': False} if 'MediaArchive' in record_get_field_values(record, *('856', '7', ' ', '2')): entry['caption_en'] = get_photolab_image_caption(record, tirage) entry['caption_fr'] = '' else: brd = BibRecDocs(recid, deleted_too=True) doc_numbers = [(bibdoc.get_id(), bibdoc) for bibdoc in brd.list_bibdocs()] doc_numbers.sort() bibdoc = doc_numbers[tirage-1][1] entry['filename'] = brd.get_docname(bibdoc.get_id()) #bibdoc.get_docname() if 'crop' in [bibdocfile.get_subformat() for bibdocfile in bibdoc.list_latest_files()]: entry['file_params']['crop'] = True if not bibdoc.deleted_p(): for bibdoc_file in bibdoc.list_latest_files(): entry['caption_en'] = bibdoc_file.get_comment() entry['caption_fr'] = bibdoc_file.get_description() if entry.get('caption_en', ''): break if not entry.get('caption_en', ''): entry['caption_en'] = entry['abstract_en'] if not entry.get('caption_fr', ''): entry['caption_fr'] = entry['abstract_fr'] if is_image: del entry['language'] del entry['video_length'] # we don't need it del entry['abstract_en'] del entry['abstract_fr'] #make sure all mandatory fields are sent MANDATORY_FIELDS = ['title_en', 'title_fr', 'caption_en', 'caption_fr', 'copyright_holder', 'copyright_date'] MANDATORY_FIELDS_IMAGE = MANDATORY_FIELDS + ['attribution', 'keywords'] MANDATORY_FIELDS_VIDEO = MANDATORY_FIELDS + ['directors', 'producer', 'thumbnail'] if is_image: mandatory_fields_all = MANDATORY_FIELDS_IMAGE else: mandatory_fields_all = MANDATORY_FIELDS_VIDEO for field in mandatory_fields_all: entry.setdefault(field, '') # In case we want to embed the object if wrapped: final = {} final['entries'] = [{'entry': entry}] if not CFG_JSON_AVAILABLE: return '' if json_format: return json.dumps(final) else: return final else: return entry
def _f1(x): info = get_kb_mapping('json_projects', str(x)) data = json.loads(info['value']) return {'id': x, 'title': data.get('title', ''), 'acronym': data.get('acronym', '')}