Python get_kb_mapping примеры, invenio.bibknowledge.get_kb_mapping Python примеры использования

Пример #1

0

Показать файл

Файл: bfe_award.py Проект: epfl-si/invenio-infoscience

def format(bfo):
    """
    Print Award.

    """
    total_output = []

    for award_field in bfo.fields('586__a', escape=2):
        if award_field and award_field.strip():
            if not ',' in award_field:
                award_name = award_field.strip()
                award_name = get_kb_mapping(kb_name='awards',
                                            key=award_name,
                                            default=award_name)
                if type(award_name) == dict:
                    award_name = award_name['value']
                total_output.append(award_name)

            award_name = ','.join(award_field.split(',')[:-1])
            award_year = award_field.split(',')[-1].strip()
            award_name = get_kb_mapping(kb_name='awards',
                                        key=award_name,
                                        default=award_name)
            if type(award_name) == dict:
                award_name = award_name['value']
            total_output.append('%s, %s' % (award_name, award_year))

    return "<br>".join(total_output)

Пример #2

0

Показать файл

Файл: bfe_thesis_info.py Проект: epfl-si/invenio-infoscience

def format(bfo, brief="no"):
    """
    Print host (Order: Name of publisher, place of publication and date of publication).
    """
    _ = gettext_set_language(bfo.lang)
    output = []
    is_epfl_thesis = bfo.field("980__a") == 'THESIS' and bfo.field(
        "973__a") == 'EPFL'
    if not is_epfl_thesis:
        #return '(%s)' % bfo.field('260__c')
        return ''

    thesis_number = bfo.field('088__a', escape=2)
    if brief != "yes":
        output = _(
            "Thèse École polytechnique fédérale de Lausanne EPFL, n° %(number)s (%(year)s)"
        )
        output %= {'number': thesis_number, 'year': bfo.field('920__b')}
    else:
        return "Thèse EPFL, n° %(number)s (%(year)s)" % {
            'number': thesis_number,
            'year': bfo.field('920__b', escape=2)
        }

    output += '<br />'
    hierarchy = []
    if bfo.field('918__d'):
        hierarchy.append(
            get_kb_mapping(kb_name='doctoral-fre', key=bfo.field('918__d')))

    if bfo.field('918__b'):
        hierarchy.append(
            get_kb_mapping(kb_name='section-fre', key=bfo.field('918__b')))

    if bfo.field('918__a'):
        hierarchy.append(
            get_kb_mapping(kb_name='school-fre', key=bfo.field('918__a')))

    if bfo.field('918__c'):
        hierarchy.append(
            get_kb_mapping(kb_name='institute-fre', key=bfo.field('918__c')))

    if bfo.fields('919__a'):
        hierarchy.extend([
            get_kb_mapping(kb_name='theses-units-fre', key=unit)
            for unit in bfo.fields('919__a')
        ])

    if bfo.field('502__a'):
        hierarchy.append({'value': 'Jury: ' + bfo.field('502__a')})

    output += '<br />'.join([elem['value'] for elem in hierarchy if elem])
    return output

Пример #3

0

Показать файл

    def _tokenize_from_country_name_tag(self, instID):
        """Get country name and country code tokens reading the
           country_name_tag tag from record instID.
           Returns a list of tokens (empty if something fails)
        """
        tokens = []
        record = get_record(instID)

        # Read the country name tags that are not marked as secondary
        country_name_list = []
        for field in record[self.address_field]:
            if "secondary" not in field.get_subfield_values(
                    self.secondary_address_subfield):
                country_name_list += field.get_subfield_values(
                    self.country_name_subfield
                )

        country_name_list = [s.encode('utf-8') for s in country_name_list]

        for country_name in country_name_list:
            # Find the country code using KB
            kb_country_code = get_kb_mapping(
                kb_name=self.kb_country_codes,
                value=country_name
            )
            if kb_country_code:
                country_code = kb_country_code["key"]

                if country_name and country_code:
                    tokens += [country_name, country_code]

        return tokens

Пример #4

0

Показать файл

Файл: zenodo_form.py Проект: mmaggi/zenodo

 def _getter(field):
     if field.data:
         val = get_kb_mapping('json_projects', str(field.data))
         if val:
             data = json_projects_kb_mapper(val)
             return data['fields'][key_name]
     return ''

Пример #5

0

Показать файл

Файл: BibIndexCountryTokenizer.py Проект: fpoli/invenio

    def _tokenize_from_country_name_tag(self, instID):
        """Get country name and country code tokens reading the
           country_name_tag tag from record instID.
           Returns a list of tokens (empty if something fails)
        """
        tokens = []
        record = get_record(instID)

        # Read the country name tags that are not marked as secondary
        country_name_list = []
        for field in record[self.address_field]:
            if "secondary" not in field.get_subfield_values(
                    self.secondary_address_subfield):
                country_name_list += field.get_subfield_values(
                    self.country_name_subfield
                )

        country_name_list = [s.encode('utf-8') for s in country_name_list]

        for country_name in country_name_list:
            # Find the country code using KB
            kb_country_code = get_kb_mapping(
                kb_name=self.kb_country_codes,
                value=country_name
            )
            if kb_country_code:
                country_code = kb_country_code["key"]

                if country_name and country_code:
                    tokens += [country_name, country_code]

        return tokens

Пример #6

0

Показать файл

def format_element(bfo, resource_type="photo"):
    """
    Used to put a copyright overlay on top of a resource (photo, video, ..)
    @param resource_type: the type of the resource the overlay will be placed on top of
    """

    if resource_type != 'photo':
        #not implement
        return ''
    output = """
        <script type="text/javascript">
            hs.creditsText = '© %(credit_text)s';
            hs.creditsHref = '%(credit_url)s';
            hs.creditsTitle = 'The use of photos requires prior authorization from %(credit_text)s';
        </script>"""

    # There might be more that one copyright and licence, select the one that
    # applies to the record, not a single file (one without '8' subfield)
    copyrights = bfo.fields('542__')
    copyright_holder = ""
    copyright_url = ""
    for copyright in copyrights:
        if not copyright.get('8', None):
            copyright_holder = copyright.get('d')
            copyright_url = copyright.get('u')
            break

    licences = bfo.fields('540__')
    licence = ""
    for lic in licences:
        if not lic.get('8', None):
            licence = lic.get('a')
            break

    if licence.startswith('CC-BY'):
        return """
        <script type="text/javascript">
            hs.creditsText = '%(credit_text)s';
            hs.creditsHref = '%(credit_url)s';
            hs.creditsTitle = '%(credit_text)s';
        </script>""" % {'credit_text': licence,
                        'credit_url' : get_kb_mapping(kb_name='LICENSE2URL', key=licence)['value']}

    if not copyright_holder:
        copyright_holder = 'CERN'

    if copyright_holder == 'CERN' and not copyright_url:
        copyright_url = CFG_CERN_LICENSE_URL


    if copyright_holder == 'CERN':
        output += """
        <script type="text/javascript" src="/js/overlay.min.js"></script>
        <script type="text/javascript" src="/js/copyright_notice.min.js"></script>
        <link href="/img/overlay.css" type="text/css" rel="stylesheet" />
        """
    return output % {'credit_text': copyright_holder,
                      'credit_url': copyright_url}

Пример #7

0

Показать файл

Файл: zenodo_form.py Проект: mmaggi/zenodo

def grants_validator(form, field):
    if field.data:
        for item in field.data:
            val = get_kb_mapping('json_projects', str(item['id']))
            if val:
                data = json_projects_kb_mapper(val)
                item['acronym'] = data['fields']['acronym']
                item['title'] = data['fields']['title']
                continue
            raise ValidationError("Invalid grant identifier %s" % item['id'])

Пример #8

0

Показать файл

Файл: zenodo_metadata.py Проект: gszpura/zenodo

def process_recjson(deposition, recjson):
    """
    Process exported recjson (common for both new and edited records)
    """
    # ===========
    # Communities
    # ===========
    # FIXME: Properly deal with provisional/non-provisional
    try:
        # Extract identifier (i.e. elements are mapped from dict ->
        # string)
        recjson['provisional_communities'] = list(set(map(
            lambda x: x['identifier'],
            recjson.get('provisional_communities', [])
        )))

        recjson['communities'] = list(set(map(
            lambda x: x['identifier'],
            recjson.get('communities', [])
        )))
    except TypeError:
        # Happens on re-run
        pass

    # =================
    # License
    # =================
    if recjson['access_right'] in ["open", "embargoed"]:
        info = get_kb_mapping(CFG_LICENSE_KB, str(recjson['license']))
        if info:
            info = json.loads(info['value'])
            recjson['license'] = dict(
                identifier=recjson['license'],
                source=CFG_LICENSE_SOURCE,
                license=info['title'],
                url=info['url'],
            )
    elif 'license' in recjson:
        del recjson['license']

    # =======================
    # Journal
    # =======================
    # Set year or delete fields if no title is provided
    if recjson.get('journal.title', None):
        recjson['journal.year'] = recjson['publication_date'].year

    # =======================
    # Book/chaper/report
    # =======================
    if 'imprint.publisher' in recjson and 'imprint.place' in recjson:
        recjson['imprint.year'] = recjson['publication_date'].year

    if 'part_of.title' in recjson:
        mapping = [
            ('part_of.publisher', 'imprint.publisher'),
            ('part_of.place', 'imprint.place'),
            ('part_of.year', 'imprint.year'),
            ('part_of.isbn', 'isbn'),
        ]
        for new, old in mapping:
            if old in recjson:
                try:
                    recjson[new] = recjson[old]
                    del recjson[old]
                except KeyError:
                    pass

    # =================
    # Grants
    # =================
    # Remap incoming dictionary
    recjson['grants'] = map(
        lambda x: dict(
            title="%s - %s (%s)" % (x['acronym'], x['title'], x['id']),
            identifier=x['id']
        ),
        recjson.get('grants', [])
    )

    # =======================
    # Filter out empty fields
    # =======================
    filter_empty_elements(recjson)

    return recjson

Пример #9

0

Показать файл

def process_recjson(deposition, recjson):
    """
    Process exported recjson (common for both new and edited records)
    """
    # ===========
    # Communities
    # ===========
    # FIXME: Properly deal with provisional/non-provisional
    try:
        # Extract identifier (i.e. elements are mapped from dict ->
        # string)
        recjson['provisional_communities'] = list(
            set(
                map(lambda x: x['identifier'],
                    recjson.get('provisional_communities', []))))

        recjson['communities'] = list(
            set(map(lambda x: x['identifier'], recjson.get('communities',
                                                           []))))
    except TypeError:
        # Happens on re-run
        pass

    # =================
    # License
    # =================
    if recjson['access_right'] in ["open", "embargoed"]:
        info = get_kb_mapping(CFG_LICENSE_KB, str(recjson['license']))
        if info:
            info = json.loads(info['value'])
            recjson['license'] = dict(
                identifier=recjson['license'],
                source=CFG_LICENSE_SOURCE,
                license=info['title'],
                url=info['url'],
            )
    elif 'license' in recjson:
        del recjson['license']

    # =======================
    # Journal
    # =======================
    # Set year or delete fields if no title is provided
    if recjson.get('journal.title', None):
        recjson['journal.year'] = recjson['publication_date'].year

    # =======================
    # Book/chaper/report
    # =======================
    if 'imprint.publisher' in recjson and 'imprint.place' in recjson:
        recjson['imprint.year'] = recjson['publication_date'].year

    if 'part_of.title' in recjson:
        mapping = [
            ('part_of.publisher', 'imprint.publisher'),
            ('part_of.place', 'imprint.place'),
            ('part_of.year', 'imprint.year'),
            ('part_of.isbn', 'isbn'),
        ]
        for new, old in mapping:
            if old in recjson:
                try:
                    recjson[new] = recjson[old]
                    del recjson[old]
                except KeyError:
                    pass

    # =================
    # Grants
    # =================
    # Remap incoming dictionary
    recjson['grants'] = map(
        lambda x: dict(title="%s - %s (%s)" %
                       (x['acronym'], x['title'], x['id']),
                       identifier=x['id']), recjson.get('grants', []))

    # =======================
    # Filter out empty fields
    # =======================
    filter_empty_elements(recjson)

    return recjson

Пример #10

0

Показать файл

def format_element(bfo, magnify='yes', check_existence='yes', source="auto", display_name="no", display_reference="yes", display_description="yes", display_comment="yes", display_tirage="yes", submission_doctype=""):
    """
    Prints html image and link to photo resources, if 8567 exists print only 8567
    otherwise if exists 8564.
    @param magnify If 'yes', images will be magnified when mouse is over images
    @param check_existence if 'yes' check that file is reachable
    @param source where to look for photos. Possible values are 'mediaarchive', 'doc', 'bibdoc' or 'auto' (check everywhere)
    """
    out = ""

    rec_is_restricted = bfo.recID in get_all_restricted_recids()
    # Hack to know about copyright while we do not have this stored in
    # the metatada.
    copyright_prefix = ''
    report_number = bfo.field('037__a')
    author = bfo.field('100__a').lower()
    if report_number.startswith('ATL') or \
           'claudia marcelloni' in author or \
           'atlas' in author or \
           'joao pequenao' in author or \
           'tiina wickstroem' in author or \
           'nikolai topilin' in author:
        copyright_prefix = '<br/>The ATLAS Experiment '

    cond_of_use = '''<a href="http://copyright.cern.ch/">Conditions of Use</a> '''
    if bfo.field('540__u') or bfo.field('542__u') or bfo.field('542__d') != 'CERN' or bfo.field('540__a'):
        cond_of_use = ''

    # Check if image is under creative commons license
    creative_commons = False
    if bfo.field('540__a').startswith('CC-BY'):
        creative_commons = True
        out += '<div about="%s" rev="license">' % get_kb_mapping(kb_name='LICENSE2URL', key=bfo.field('540__a'))['value']
    multimedia = {}
    if source in ['auto', 'mediaarchive']:
        multimedia = get_media(bfo, check_existence=(check_existence.lower() == 'yes'))
        # Also append master information to the multimedia structure
        masters = get_media(bfo, path_code='d', internal_note_code='x', check_existence=(check_existence.lower() == 'yes'))
        for (tirage, info) in masters.iteritems():
            if multimedia.has_key(tirage):
                multimedia[tirage]['master'] = info['master']

    if multimedia != {} and source in ['auto', 'mediaarchive']:
        out += '''<center><small><strong>%s%s</strong></small></center><br />''' % (cond_of_use, bfe_copyright.format_element(bfo) or '&copy; CERN')
        out += '''<center><small><a href="%(CFG_SITE_URL)s/help/high-res-multimedia?ln=%(ln)s">%(label)s</a></small></center>''' % \
               {'CFG_SITE_URL': CFG_SITE_URL,
                'ln': bfo.lang,
                'label': bfo.lang == "fr" and 'Besoin d\'aide pour acc&eacute;der aux photos en haute r&eacute;solution?' or \
                'Need help to download high-resolutions?'}
        mediaarchive_pictures = print_images(multimedia=multimedia, magnify=magnify,
                                reference=bfo.field('037__a'), bfo=bfo)
        if len(multimedia) > 1 and not rec_is_restricted: # we have at least 2 photos
            out += generate_view_button(bfo, report_number, mediaarchive_pictures)
        else:
            out += mediaarchive_pictures
        out += '''<script type="text/javascript">
        window.onload = function() {
            if (location.hash != ''){
                    var pic = document.getElementById('thumb'+location.hash.substring(1));
                    if (pic != null){
                        hs.expand(pic)
                    }
                }
        }
                </script>'''
    elif not source in ['mediaarchive']:
        out += '''<center><small><strong>%s%s</strong></small></center><br />''' % (cond_of_use, bfe_copyright.format_element(bfo) or '&copy; CERN')

        bibdoc_pictures = get_bibdoc_pictures(bfo, display_name, display_reference,
                                              display_description, display_comment,
                                              display_tirage, submission_doctype)
        if bibdoc_pictures and source in ['auto', 'bibdoc']:
            if bibdoc_pictures.count('<img ') > 1 and not rec_is_restricted:# we have at least 1 photo
                out += generate_view_button(bfo, report_number, bibdoc_pictures)
            else:
                out += bibdoc_pictures
        elif source in ['auto', 'doc']:
            # Use picture from doc
            out += get_doc_pictures(bfo)

    if creative_commons:
        out += '</div>'
    return out

Пример #11

0

Показать файл

def generate_mediaexport(recid, is_image, resource_id, tirage, wrapped, json_format=True):
    """Generates the JSON with the info needed to export a media resource to  CERN-Drupal"""
    """Mandatory fields to export: title_en, title_fr, caption_en, caption_fr,
                                   copyright_holder, copyright_date, attribution (image),
                                   keywords (image), directors (video), producer (video)
    """

    MEDIA_CONFIG = {'title_en':         ('245', ' ', ' ', 'a'),
                    'title_fr':         ('246', ' ', '1', 'a'),
                    'keywords':         ('653', '1', ' ', 'a'),
                    'copyright_holder': ('542', ' ', ' ', 'd'),
                    'copyright_date':   ('542', ' ', ' ', 'g'),
                    'license_url':      ('540', ' ', ' ', 'a'),
                    'license_desc':     ('540', ' ', ' ', 'b'),
                    'license_body':     ('540', ' ', ' ', 'u'),
                    'author':           ('100', ' ', ' ', 'a'),
                    'affiliation':      ('100', ' ', ' ', 'u'),
                    'directors':        ('700', ' ', ' ', 'a'),
                    'video_length':     ('300', ' ', ' ', 'a'),
                    'language':         ('041', ' ', ' ', 'a'),
                    'creation_date':    ('269', ' ', ' ', 'c'),
                    'abstract_en':      ('520', ' ', ' ', 'a'),
                    'abstract_fr':      ('590', ' ', ' ', 'a')}

    entry = {}
    record = get_record(recid)

    for key in MEDIA_CONFIG:
        entry[key] = record_get_field_value(record, *MEDIA_CONFIG[key])#.encode('utf-8')

    entry['id'] = resource_id
    entry['record_id'] = str(recid)
    entry['type'] = is_image and "image" or "video"
    entry['entry_date'] = get_creation_date(recid)

    toc_recid = 0
    toc_record = {}
    if not is_image and 'asset' in record_get_field_value(record, *('970', ' ', ' ', 'a')):
        toc_repnum = record_get_field_value(record, *('773', ' ', ' ', 'r'))
        if toc_repnum:
            try:
                toc_recid = search_pattern(p='reportnumber:"%s"' %toc_repnum)[0]
            except IndexError:
                pass

    #corner cases for copyright & licence
    if not entry['copyright_holder']:
        entry['copyright_holder'] = 'CERN'
    if not entry['license_body']:
        entry['license_body'] = 'CERN'
    if not entry['license_desc']:
        entry['license_desc'] = 'CERN'
    if not entry['license_url']:
        from invenio.bibknowledge import get_kb_mapping
        try:
            entry['license_url'] = get_kb_mapping(kb_name='LICENSE2URL', key=entry['license_desc'])['value']
        except KeyError:
            pass

    #keywords
    entry['keywords'] = ','.join(record_get_field_values(record, *MEDIA_CONFIG['keywords']))

    #attribution
    if not entry.get('author', '') and not entry.get('attribution', '') and toc_recid > 0:
        if not toc_record:
            toc_record = get_record(toc_recid)
        entry['author'] = record_get_field_value(toc_record, *MEDIA_CONFIG['author'])
        entry['affiliation'] = record_get_field_value(toc_record, *MEDIA_CONFIG['affiliation'])
        if not entry.get('directors', ''):
            entry['directors'] = ','.join(record_get_field_values(toc_record, *MEDIA_CONFIG['directors']))

    #photos
    if is_image:
        if entry['author']:
            entry['attribution'] = entry['author']
        if entry['affiliation']:
            entry['attribution'] += ': %s' % entry['affiliation']
        del entry['directors']
    else: #videos
        if entry['author']:
            entry['producer'] = entry['author']
        # Get all files from record
        files_field = ('856', '7', ' ', 'u')
        # Filter all that are images
        thumbnails = [
            image for image in record_get_field_values(record, *files_field)
            if 'jpg' in image
        ]
        # If exists get the first one
        if thumbnails:
            entry['thumbnail'] = thumbnails[0]


    del entry['author']
    del entry['affiliation']

    #
    #title
    if not entry['title_en'] and not entry['title_fr'] and toc_recid > 0:
        if not toc_record:
            toc_record = get_record(toc_recid)
        entry['title_en'] = record_get_field_value(toc_record, *MEDIA_CONFIG['title_en'])
        entry['title_fr'] = record_get_field_value(toc_record, *MEDIA_CONFIG['title_fr'])

    #crop, media storage, caption
    if is_image:
        entry['file_params'] = {'size': ['small', 'medium', 'large'], 'crop': False}

        if 'MediaArchive' in record_get_field_values(record, *('856', '7', ' ', '2')):
            entry['caption_en'] = get_photolab_image_caption(record, tirage)
            entry['caption_fr'] = ''
        else:
            brd = BibRecDocs(recid, deleted_too=True)
            doc_numbers = [(bibdoc.get_id(), bibdoc) for bibdoc in brd.list_bibdocs()]
            doc_numbers.sort()
            bibdoc = doc_numbers[tirage-1][1]
            entry['filename'] = brd.get_docname(bibdoc.get_id()) #bibdoc.get_docname()
            if 'crop' in [bibdocfile.get_subformat() for bibdocfile in bibdoc.list_latest_files()]:
                entry['file_params']['crop'] = True
            if not bibdoc.deleted_p():
                for bibdoc_file in bibdoc.list_latest_files():
                    entry['caption_en'] = bibdoc_file.get_comment()
                    entry['caption_fr'] = bibdoc_file.get_description()
                    if entry.get('caption_en', ''):
                        break

    if not entry.get('caption_en', ''):
        entry['caption_en'] = entry['abstract_en']
    if not entry.get('caption_fr', ''):
        entry['caption_fr'] = entry['abstract_fr']

    if is_image:
        del entry['language']
        del entry['video_length']

    # we don't need it
    del entry['abstract_en']
    del entry['abstract_fr']

    #make sure all mandatory fields are sent
    MANDATORY_FIELDS = ['title_en', 'title_fr', 'caption_en', 'caption_fr', 'copyright_holder', 'copyright_date']
    MANDATORY_FIELDS_IMAGE = MANDATORY_FIELDS + ['attribution', 'keywords']
    MANDATORY_FIELDS_VIDEO = MANDATORY_FIELDS + ['directors', 'producer', 'thumbnail']

    if is_image:
        mandatory_fields_all = MANDATORY_FIELDS_IMAGE
    else:
        mandatory_fields_all = MANDATORY_FIELDS_VIDEO

    for field in mandatory_fields_all:
        entry.setdefault(field, '')
    # In case we want to embed the object
    if wrapped:
        final = {}
        final['entries'] = [{'entry': entry}]

        if not CFG_JSON_AVAILABLE:
            return ''

        if json_format:
            return json.dumps(final)
        else:
            return final
    else:
        return entry

Пример #12

0

Показать файл

 def _f1(x):
     info = get_kb_mapping('json_projects', str(x))
     data = json.loads(info['value'])
     return {'id': x, 'title': data.get('title', ''), 'acronym': data.get('acronym', '')}

Python get_kb_mapping примеры использования