示例#1
0
    def post_process(self, metadata):
        """Post process the data.

        add extra data such as title statement.

        :param metadata: dictionary version of a record
        :return: the modified dictionary
        """
        metadata = Document.post_process(metadata)

        titles = metadata.get('title', [])
        text_title = title_format_text_head(titles, with_subtitle=False)
        if text_title:
            metadata['ui_title_text'] = text_title
        responsibility = metadata.get('responsibilityStatement', [])
        text_title = title_format_text_head(titles,
                                            responsibility,
                                            with_subtitle=False)
        if text_title:
            metadata['ui_title_text_responsibility'] = text_title
        contributions = metadata.get('contribution', [])
        new_contributions = []
        for contribution in contributions:
            agent = contribution['agent']
            agent_type = agent['type']
            agent_data = JsonRef.replace_refs(agent,
                                              loader=None).get('metadata')
            if agent_data:
                agent_data.pop('$schema', None)
                agent = agent_data
                agent['type'] = agent_type
            new_contributions.append({'agent': agent})
        if new_contributions:
            metadata['contribution'] = create_contributions(new_contributions)
        return metadata
def test_title_format_text_head():
    """Test title format text head."""
    data = [{
        'mainTitle': [{
            'value': 'Dingding lixianji'
        }, {
            'value': '\u4e01\u4e01\u5386\u9669\u8bb0',
            'language': 'und-hani'
        }],
        'type': 'bf:Title'
    }]
    assert '\u4e01\u4e01\u5386\u9669\u8bb0' == title_format_text_head(data)

    data = [{
        'mainTitle': [{
            'value': 'Die russischen orthodoxen Bischöfe von 1893',
        }],
        'subtitle': [{
            'value': 'Bio-Bibliographie'
        }],
        'type': 'bf:Title'
    }]
    assert 'Die russischen orthodoxen Bischöfe von 1893 ' \
           ': Bio-Bibliographie' == title_format_text_head(data)

    data = [{
        'mainTitle': [{
            'value': 'Die russischen orthodoxen Bischöfe von 1893',
        }, {
            'value': 'The Russian Orthodox Bishops of 1893',
            'language': 'eng'
        }],
        'subtitle': [{
            'value': 'Bio-Bibliographie'
        }],
        'type': 'bf:Title'
    }]
    assert 'The Russian Orthodox Bishops of 1893' == \
        title_format_text_head(data)

    data = [{
        'mainTitle': [{
            'value': 'main_title_text',
        }],
        'subtitle': [{
            'value': 'subtitle_text'
        }],
        'part': [
            {'partName': [{'value': 'part1'}, {'value': 'part1.1'}]},
            {'partName': [{'value': 'part2'}]}
        ],
        'type': 'bf:Title'
    }]
    assert 'main_title_text : subtitle_text. part1, part1.1, part2' == \
           title_format_text_head(data)
示例#3
0
文件: json.py 项目: lauren-d/rero-ils
    def post_process_serialize_search(self, results, pid_fetcher):
        """Post process the search results.

        :param results: Elasticsearch search result.
        :param pid_fetcher: Persistent identifier fetcher.
        """
        records = results.get('hits', {}).get('hits', {})

        for record in records:
            metadata = record.get('metadata', {})
            document = search_document_by_pid(
                metadata.get('document').get('pid'))
            metadata['ui_title_text'] = title_format_text_head(
                document['title'], with_subtitle=True)

        # Add library name
        for lib_term in results.get('aggregations',
                                    {}).get('library', {}).get('buckets', []):
            lib = Library.get_record_by_pid(lib_term.get('key'))
            lib_term['name'] = lib.get('name')
        # Add location name
        for loc_term in results.get('aggregations',
                                    {}).get('location', {}).get('buckets', []):
            loc = Location.get_record_by_pid(loc_term.get('key'))
            loc_term['name'] = loc.get('name')

        # Add library name
        for item_type_term in results.get('aggregations',
                                          {}).get('item_type',
                                                  {}).get('buckets', []):
            item_type = ItemType.get_record_by_pid(item_type_term.get('key'))
            item_type_term['name'] = item_type.get('name')

        return super(ItemsJSONSerializer,
                     self).post_process_serialize_search(results, pid_fetcher)
示例#4
0
    def dump(self, record, data):
        """Dump an AcqReceiptLine instance for ElasticSearch.

        For ElasticSearch integration, we need to dump basic informations from
        a `AcqReceiptLine` object instance, and add some basic data about
        related.

        :param record: The record to dump.
        :param data: The initial dump data passed in by ``record.dumps()``.
        """
        # Keep only some attributes from AcqReceiptLine object initial dump.
        for attr in ['pid', 'receipt_date', 'amount', 'quantity', 'vat_rate']:
            value = record.get(attr)
            if value:
                data.update({attr: value})
        notes = record.get('notes', [])
        if notes:
            data['notes'] = [note['content'] for note in notes]

        # Add document informations: pid, formatted title and ISBN identifiers.
        # (remove None values from document metadata)
        document = record.order_line.document
        data['document'] = {
            'pid': document.pid,
            'title': title_format_text_head(document.get('title', [])),
            'identifiers': document.get_identifier_values(filters=['bf:Isbn'])
        }
        data['document'] = {k: v for k, v in data['document'].items() if v}
        return data
示例#5
0
文件: csv.py 项目: lauren-d/rero-ils
    def preprocess_search_hit(self,
                              pid,
                              record_hit,
                              links_factory=None,
                              **kwargs):
        """Prepare a record hit from Elasticsearch for serialization.

        :param pid: Persistent identifier instance.
        :param record_hit: Record metadata retrieved via search.
        :param links_factory: Factory function for record links.
        """
        record = record_hit['_source']
        item_pid = pid.pid_value

        # process location
        locations_map = kwargs.get('locations_map')
        record['location_name'] = locations_map[record.get('location').get(
            'pid')]

        # retrieve and process document
        document = search_document_by_pid(record['document']['pid'])
        record['document_title'] = title_format_text_head(document.title,
                                                          with_subtitle=True)
        creator = []
        for contribution in document.contribution:
            if any(role in contribution.role for role in role_filter):
                try:
                    creator.append(contribution['agent']['preferred_name'])
                except KeyError:
                    creator.append(
                        contribution['agent']['authorized_access_point_en'])
        if creator:
            record['document_creator'] = ' ; '.join(creator)
        record['document_type'] = document.type

        # get loans information
        loans_count, loans = search_active_loans_for_item(item_pid)
        record['loans_count'] = loans_count
        if loans_count:
            # get first loan
            loan = next(loans)
            record['last_transaction_date'] = format_date_filter(
                loan.transaction_date,
                date_format='short',
                locale=current_i18n.locale.language,
            )

        record['created'] = format_date_filter(
            record['_created'],
            date_format='short',
            locale=current_i18n.locale.language,
        )

        # prevent csv key error
        # TODO: find other way to prevent csv key error
        del (record['type'])

        return record
示例#6
0
def test_title_format_text_head():
    """Test title format text head."""
    data = [{
        "mainTitle": [{
            "value": "Dingding lixianji"
        }, {
            "value": "\u4e01\u4e01\u5386\u9669\u8bb0",
            "language": "und-hani"
        }],
        "type":
        "bf:Title"
    }]
    assert "\u4e01\u4e01\u5386\u9669\u8bb0" == title_format_text_head(data)

    data = [{
        "mainTitle": [{
            "value": "Die russischen orthodoxen Bischöfe von 1893",
        }],
        "subtitle": [{
            "value": "Bio-Bibliographie"
        }],
        "type":
        "bf:Title"
    }]
    assert "Die russischen orthodoxen Bischöfe von 1893 " \
           ": Bio-Bibliographie" == title_format_text_head(data)

    data = [{
        "mainTitle": [{
            "value": "Die russischen orthodoxen Bischöfe von 1893",
        }, {
            "value": "The Russian Orthodox Bishops of 1893",
            "language": "eng"
        }],
        "subtitle": [{
            "value": "Bio-Bibliographie"
        }],
        "type":
        "bf:Title"
    }]
    assert "The Russian Orthodox Bishops of 1893" == \
        title_format_text_head(data)
示例#7
0
    def dump(self, record, data):
        """Dump a document instance with basic document informations.

        :param record: The record to dump.
        :param data: The initial dump data passed in by ``record.dumps()``.
        """
        title_text = title_format_text_head(
            record.get('title', []),
            responsabilities=record.get('responsibilityStatement'))
        data.update({'pid': record.get('pid'), 'title_text': title_text})
        data = {k: v for k, v in data.items() if v}
        return data
示例#8
0
文件: model.py 项目: rerowep/rero-ils
    def do(self,
           blob,
           ignore_missing=True,
           exception_handlers=None,
           language='fr'):
        """Translate blob values and instantiate new model instance.

        Raises ``MissingRule`` when no rule matched and ``ignore_missing``
        is ``False``.

        :param blob: ``dict``-like object on which the matching rules are
                     going to be applied.
        :param ignore_missing: Set to ``False`` if you prefer to raise
                               an exception ``MissingRule`` for the first
                               key that it is not matching any rule.
        :param exception_handlers: Give custom exception handlers to take care
                                   of non-standard codes that are installation
                                   specific.
        :param language: Language to use.
        """
        self.language = language

        result = super().do(blob,
                            ignore_missing=ignore_missing,
                            exception_handlers=exception_handlers)
        titles = blob.get('title', [])
        bf_titles = list(filter(lambda t: t['type'] == 'bf:Title', titles))

        text = title_format_text_head(titles=bf_titles,
                                      responsabilities=blob.get(
                                          'responsibilityStatement', []),
                                      with_subtitle=True)
        if text:
            result['titles'] = [text]

        pid = blob.get('pid')
        if pid:
            identifiers = result.get('identifiers', [])
            identifiers.insert(0, f'bf:Local|{pid}')
        return result
示例#9
0
    def dump(self, record, data):
        """Dump an AcqOrderLine instance for ElasticSearch.

        For ElasticSearch integration, we need to dump basic informations from
        a `AcqOrderLine` object instance, and add some data from related
        object : related account basic informations and related document basic
        informations.

        :param record: The record to dump.
        :param data: The initial dump data passed in by ``record.dumps()``.
        """
        # Keep only some attributes from AcqOrderLine object initial dump.
        for attr in ['pid', 'status', 'order_date', 'quantity']:
            value = record.get(attr)
            if value:
                data.update({attr: value})

        # Add account informations: pid, name and reference number.
        # (remove None values from account metadata)
        account = record.account
        data['account'] = {
            'pid': account.pid,
            'name': account['name'],
            'number': account.get('number')
        }
        data['account'] = {k: v for k, v in data['account'].items() if v}

        # Add document informations: pid, formatted title and ISBN identifiers.
        # (remove None values from document metadata)
        document = record.document
        data['document'] = {
            'pid': document.pid,
            'title': title_format_text_head(document.get('title', [])),
            'identifiers': document.get_identifier_values(filters=['bf:Isbn'])
        }
        data['document'] = {k: v for k, v in data['document'].items() if v}
        return data
示例#10
0
文件: csv.py 项目: zannkukai/rero-ils
    def preprocess_search_hit(self,
                              pid,
                              record_hit,
                              links_factory=None,
                              **kwargs):
        """Prepare a record hit from Elasticsearch for serialization.

        :param pid: Persistent identifier instance.
        :param record_hit: Record metadata retrieved via search.
        :param links_factory: Factory function for record links.
        """
        language = kwargs.get('language')

        record = record_hit['_source']
        # inherit holdings call number when possible
        item = Item(record)
        issue_call_number = item.issue_inherited_first_call_number
        if issue_call_number:
            record['call_number'] = issue_call_number
        item_pid = pid.pid_value

        # process location
        locations_map = kwargs.get('locations_map')
        record['location_name'] = locations_map[record.get('location').get(
            'pid')]

        # retrieve and process document
        document = search_document_by_pid(record['document']['pid'])
        record['document_title'] = title_format_text_head(document.title,
                                                          with_subtitle=True)

        # process contributions
        creator = []
        if 'contribution' in document:
            for contribution in document.contribution:
                if any(role in contribution.role for role in role_filter):
                    authorized_access_point = \
                        f'authorized_access_point_{language}'
                    if authorized_access_point in contribution['agent']:
                        creator.append(
                            contribution['agent'][authorized_access_point])
        if creator:
            record['document_creator'] = ' ; '.join(creator)
        document_main_type = []
        document_sub_type = []
        for document_type in document.type:
            data = document_type.to_dict()
            document_main_type.append(data.get('main_type'))
            document_sub_type.append(data.get('subtype', ''))
        record['document_main_type'] = ', '.join(document_main_type)
        record['document_sub_type'] = ', '.join(document_sub_type)

        # get loans information
        loans_count, loans = search_active_loans_for_item(item_pid)
        record['loans_count'] = loans_count
        if loans_count:
            # get first loan
            loan = next(loans)
            record['last_transaction_date'] = format_date_filter(
                loan.transaction_date,
                date_format='short',
                locale=language,
            )

        record['created'] = format_date_filter(
            record['_created'],
            date_format='short',
            locale=language,
        )

        # prevent csv key error
        # TODO: find other way to prevent csv key error
        del (record['type'])

        return record
示例#11
0
    def post_process_serialize_search(self, results, pid_fetcher):
        """Post process the search results.

        :param results: Elasticsearch search result.
        :param pid_fetcher: Persistent identifier fetcher.
        """
        records = results.get('hits', {}).get('hits', {})
        orgs = {}
        libs = {}
        locs = {}
        for record in records:
            metadata = record.get('metadata', {})
            document = search_document_by_pid(
                metadata.get('document').get('pid')
            )
            metadata['ui_title_text'] = title_format_text_head(
                document['title'],
                with_subtitle=True
            )

            item = Item.get_record_by_pid(metadata.get('pid'))
            metadata['availability'] = {
                'available': item.available,
                'status': metadata['status'],
                'display_text': item.availability_text,
                'request': item.number_of_requests()
            }
            if not metadata['available']:
                if metadata['status'] == ItemStatus.ON_LOAN:
                    metadata['availability']['due_date'] =\
                        item.get_item_end_date(format='long', language='en')

            # Item in collection
            collection = item.in_collection()
            if collection:
                metadata['in_collection'] = collection
            # Organisation
            organisation = metadata['organisation']
            if organisation['pid'] not in orgs:
                orgs[organisation['pid']] = Organisation \
                    .get_record_by_pid(organisation['pid'])
            organisation['viewcode'] = orgs[organisation['pid']].get('code')
            # Library
            library = metadata['library']
            if library['pid'] not in libs:
                libs[library['pid']] = Library \
                    .get_record_by_pid(library['pid'])
            library['name'] = libs[library['pid']].get('name')
            # Location
            location = metadata['location']
            if location['pid'] not in locs:
                locs[location['pid']] = Location \
                    .get_record_by_pid(location['pid'])
            location['name'] = locs[location['pid']].get('name')

        # Add library name
        for lib_term in results.get('aggregations', {}).get(
                'library', {}).get('buckets', []):
            lib = Library.get_record_by_pid(lib_term.get('key'))
            lib_term['name'] = lib.get('name')
        # Add location name
        for loc_term in results.get('aggregations', {}).get(
                'location', {}).get('buckets', []):
            loc = Location.get_record_by_pid(loc_term.get('key'))
            loc_term['name'] = loc.get('name')

        # Add item type name
        for item_type_term in results.get('aggregations', {}).get(
                'item_type', {}).get('buckets', []):
            item_type = ItemType.get_record_by_pid(item_type_term.get('key'))
            item_type_term['name'] = item_type.get('name')

        # Add vendor name
        for vendor_term in results.get('aggregations', {}).get(
                'vendor', {}).get('buckets', []):
            vendor = Vendor.get_record_by_pid(vendor_term.get('key'))
            vendor_term['name'] = vendor.get('name')

        # Correct document type buckets
        buckets = results['aggregations']['document_type']['buckets']
        results['aggregations']['document_type']['buckets'] = \
            filter_document_type_buckets(buckets)

        return super().post_process_serialize_search(results, pid_fetcher)

        # Correct document type buckets
        buckets = results['aggregations']['document_type']['buckets']
        results['aggregations']['document_type']['buckets'] = \
            filter_document_type_buckets(buckets)
示例#12
0
    def post_process_serialize_search(self, results, pid_fetcher):
        """Post process the search results.

        :param results: Elasticsearch search result.
        :param pid_fetcher: Persistent identifier fetcher.
        """
        records = results.get('hits', {}).get('hits', {})
        orgs = {}
        libs = {}
        locs = {}

        # enrich library bucket
        JSONSerializer.enrich_bucket_with_data(results, 'library',
                                               LibrariesSearch, 'name')
        # enrich location bucket
        JSONSerializer.enrich_bucket_with_data(results, 'location',
                                               LocationsSearch, 'name')
        # enrich item type bucket
        JSONSerializer.enrich_bucket_with_data(results, 'item_type',
                                               ItemTypesSearch, 'name')
        # enrich temporary item type bucket
        JSONSerializer.enrich_bucket_with_data(results, 'temporary_item_type',
                                               ItemTypesSearch, 'name')
        # enrich temporary location bucket
        JSONSerializer.enrich_bucket_with_data(results, 'temporary_location',
                                               LocationsSearch, 'name')
        # enrich vendor bucket
        JSONSerializer.enrich_bucket_with_data(results, 'vendor',
                                               VendorsSearch, 'name')

        for record in records:
            metadata = record.get('metadata', {})
            document = search_document_by_pid(
                metadata.get('document').get('pid'))
            metadata['ui_title_text'] = title_format_text_head(
                document['title'], with_subtitle=True)

            item = Item.get_record_by_pid(metadata.get('pid'))
            metadata['available'] = item.available
            metadata['availability'] = {
                'available': metadata['available'],
                'status': metadata['status'],
                'display_text': item.availability_text,
                'request': item.number_of_requests()
            }
            if not metadata['available']:
                if metadata['status'] == ItemStatus.ON_LOAN:
                    metadata['availability']['due_date'] =\
                        item.get_item_end_date(format='long', language='en')

            # Item in collection
            collection = item.in_collection()
            if collection:
                metadata['in_collection'] = collection

            # Temporary location
            temp_location = metadata.get('temporary_location')
            if temp_location:
                temp_location_pid = temp_location['pid']
                if temp_location_pid not in locs:
                    locs[temp_location_pid] = Location \
                        .get_record_by_pid(temp_location_pid)
                temp_location['name'] = locs[temp_location_pid].get('name')

            # Organisation
            organisation = metadata['organisation']
            if organisation['pid'] not in orgs:
                orgs[organisation['pid']] = Organisation \
                    .get_record_by_pid(organisation['pid'])
            organisation['viewcode'] = orgs[organisation['pid']].get('code')
            # Library
            library = metadata['library']
            if library['pid'] not in libs:
                libs[library['pid']] = Library \
                    .get_record_by_pid(library['pid'])
            library['name'] = libs[library['pid']].get('name')
            # Location
            location = metadata['location']
            if location['pid'] not in locs:
                locs[location['pid']] = Location \
                    .get_record_by_pid(location['pid'])
            location['name'] = locs[location['pid']].get('name')

        # Correct document type buckets
        if results.get('aggregations', {}).get('document_type'):
            buckets = results['aggregations']['document_type']['buckets']
            results['aggregations']['document_type']['buckets'] = \
                filter_document_type_buckets(buckets)

        return super().post_process_serialize_search(results, pid_fetcher)