Exemplo n.º 1
0
def get_records(**kwargs):
    """Get records paginated."""
    page_ = kwargs.get('resumptionToken', {}).get('page', 1)
    size_ = current_app.config['OAISERVER_PAGE_SIZE']
    scroll = current_app.config['OAISERVER_RESUMPTION_TOKEN_EXPIRE_TIME']
    scroll_id = kwargs.get('resumptionToken', {}).get('scroll_id')

    if scroll_id is None:
        search = OAIServerSearch(
            index=current_app.config['OAISERVER_RECORD_INDEX'],
        ).params(
            scroll='{0}s'.format(scroll),
        ).extra(
            version=True,
        )[(page_-1)*size_:page_*size_]

        if 'set' in kwargs:
            search = search.query('match', **{'_oai.sets': kwargs['set']})

        time_range = {}
        if 'from_' in kwargs:
            time_range['gte'] = kwargs['from_']
        if 'until' in kwargs:
            time_range['lte'] = kwargs['until']
        if time_range:
            search = search.filter('range', **{'_updated': time_range})

        response = search.execute().to_dict()
    else:
        response = current_search_client.scroll(
            scroll_id=scroll_id,
            scroll='{0}s'.format(scroll),
        )

    class Pagination(object):
        """Dummy pagination class."""

        page = page_
        per_page = size_

        def __init__(self, response):
            """Initilize pagination."""
            self.response = response
            self.total = response['hits']['total']
            self._scroll_id = response.get('_scroll_id')

            # clean descriptor on last page
            if not self.has_next:
                current_search_client.clear_scroll(
                    scroll_id=self._scroll_id
                )
                self._scroll_id = None

        @cached_property
        def has_next(self):
            """Return True if there is next page."""
            return self.page * self.per_page <= self.total

        @cached_property
        def next_num(self):
            """Return next page number."""
            return self.page + 1 if self.has_next else None

        @property
        def items(self):
            """Return iterator."""
            from datetime import datetime
            for result in self.response['hits']['hits']:
                if '_oai' in result['_source']:
                    yield {
                        'id': result['_id'],
                        'json': result,
                        'updated': datetime.strptime(
                            result['_source']['_updated'][:19],
                            '%Y-%m-%dT%H:%M:%S'
                        ),
                    }

    return Pagination(response)
Exemplo n.º 2
0
Arquivo: query.py Projeto: mhaya/weko
def get_records(**kwargs):
    """Get records paginated."""
    page_ = kwargs.get('resumptionToken', {}).get('page', 1)
    size_ = current_app.config['OAISERVER_PAGE_SIZE']
    scroll = current_app.config['OAISERVER_RESUMPTION_TOKEN_EXPIRE_TIME']
    scroll_id = kwargs.get('resumptionToken', {}).get('scroll_id')

    if scroll_id is None:
        search = OAIServerSearch(
            index=current_app.config['OAISERVER_RECORD_INDEX'],
        ).params(
            scroll='{0}s'.format(scroll),
        ).extra(
            version=True,
        )[(page_ - 1) * size_:page_ * size_]

        if 'set' in kwargs:
            search = search.query('match', **{'_oai.sets': kwargs['set']})

        time_range = {}
        if 'from_' in kwargs:
            time_range['gte'] = kwargs['from_']
        if 'until' in kwargs:
            time_range['lte'] = kwargs['until']
        if time_range:
            search = search.filter('range', **{'_updated': time_range})

        response = search.execute().to_dict()
    else:
        response = current_search_client.scroll(
            scroll_id=scroll_id,
            scroll='{0}s'.format(scroll),
        )

    class Pagination(object):
        """Dummy pagination class."""

        page = page_
        per_page = size_

        def __init__(self, response):
            """Initilize pagination."""
            self.response = response
            self.total = response['hits']['total']
            self._scroll_id = response.get('_scroll_id')

            # clean descriptor on last page
            if not self.has_next:
                current_search_client.clear_scroll(
                    scroll_id=self._scroll_id
                )
                self._scroll_id = None

        @cached_property
        def has_next(self):
            """Return True if there is next page."""
            return self.page * self.per_page <= self.total

        @cached_property
        def next_num(self):
            """Return next page number."""
            return self.page + 1 if self.has_next else None

        @property
        def items(self):
            """Return iterator."""
            from datetime import datetime
            for result in self.response['hits']['hits']:
                if '_oai' in result['_source']:
                    yield {
                        'id': result['_id'],
                        'json': result,
                        'updated': datetime.strptime(
                            result['_source']['_updated'][:19],
                            '%Y-%m-%dT%H:%M:%S'
                        ),
                    }

    return Pagination(response)
Exemplo n.º 3
0
def get_records(**kwargs):
    """Get records paginated."""
    def index_ids_has_future_date():
        """Get indexes."""
        query = Index.query.filter(
            Index.public_state.is_(True),
            Index.public_date > datetime.now(),
            Index.harvest_public_state.is_(True)
        )
        indexes = query.all() or []
        index_ids = [index.id for index in indexes]
        return index_ids

    def get_records_has_doi():
        """Get object_uuid of PersistentIdentifier."""
        # Get object_uuid of PersistentIdentifier
        query = PersistentIdentifier.query.filter(
            PersistentIdentifier.pid_type == 'doi'
        )
        pids = query.all() or []
        object_uuids = [pid.object_uuid for pid in pids]
        # Get RecordMetadata
        query = RecordMetadata.query.filter(
            RecordMetadata.id.in_(object_uuids)
        )
        records = query.all() or []
        return records

    def add_condition_doi_and_future_date(query):
        """Add condition which do not get DOI."""
        index_ids = index_ids_has_future_date()
        records = get_records_has_doi()
        for record in records:
            paths = record.json.get('path', [])
            for path in paths:
                if path in index_ids:
                    query = query.post_filter(
                        'bool',
                        **{'must_not': [
                            {'term': {'_id': str(record.id)}}]})
                    continue

    from weko_index_tree.api import Indexes
    page_ = kwargs.get('resumptionToken', {}).get('page', 1)
    size_ = current_app.config['OAISERVER_PAGE_SIZE']
    scroll = current_app.config['OAISERVER_RESUMPTION_TOKEN_EXPIRE_TIME']
    scroll_id = kwargs.get('resumptionToken', {}).get('scroll_id')

    if not scroll_id:
        search = OAIServerSearch(
            index=current_app.config['INDEXER_DEFAULT_INDEX'],
        ).params(
            scroll='{0}s'.format(scroll),
        ).extra(
            version='true',
        ).sort(
            {'control_number': {'order': 'asc'}}
        )[(page_ - 1) * size_:page_ * size_]

        if 'set' in kwargs:
            search = search.query('match', **{'_oai.sets': kwargs['set']})

        time_range = {}
        if 'from_' in kwargs:
            time_range['gte'] = kwargs['from_']
        if 'until' in kwargs:
            time_range['lte'] = kwargs['until']
        if time_range:
            search = search.filter('range', **{'_updated': time_range})

        search = search.query('match', **{'relation_version_is_last': 'true'})
        index_paths = Indexes.get_harverted_index_list()
        query_filter = [
            # script get deleted items.
            {"bool": {"must_not": {"exists": {"field": "path"}}}}
        ]
        for index_path in index_paths:
            query_filter.append({
                "wildcard": {
                    "path": index_path
                }
            })
        search = search.query(
            'bool', **{'must': [{'bool': {'should': query_filter}}]})
        add_condition_doi_and_future_date(search)
        response = search.execute().to_dict()
    else:
        response = current_search_client.scroll(
            scroll_id=scroll_id,
            scroll='{0}s'.format(scroll),
        )

    class Pagination(object):
        """Dummy pagination class."""

        page = page_
        per_page = size_

        def __init__(self, response):
            """Initilize pagination."""
            self.response = response
            self.total = response['hits']['total']
            self._scroll_id = response.get('_scroll_id')

            # clean descriptor on last page
            if not self.has_next:
                current_search_client.clear_scroll(
                    scroll_id=self._scroll_id
                )
                self._scroll_id = None

        @cached_property
        def has_next(self):
            """Return True if there is next page."""
            return self.page * self.per_page <= self.total

        @cached_property
        def next_num(self):
            """Return next page number."""
            return self.page + 1 if self.has_next else None

        @property
        def items(self):
            """Return iterator."""
            from datetime import datetime
            for result in self.response['hits']['hits']:
                if '_oai' in result['_source']:
                    yield {
                        'id': result['_id'],
                        'json': result,
                        'updated': datetime.strptime(
                            result['_source']['_updated'][:19],
                            '%Y-%m-%dT%H:%M:%S'
                        ),
                    }

    return Pagination(response)
Exemplo n.º 4
0
def get_records(**kwargs):
    """Get records paginated."""
    page_ = kwargs.get("resumptionToken", {}).get("page", 1)
    size_ = current_app.config["OAISERVER_PAGE_SIZE"]
    scroll = current_app.config["OAISERVER_RESUMPTION_TOKEN_EXPIRE_TIME"]
    scroll_id = kwargs.get("resumptionToken", {}).get("scroll_id")

    if scroll_id is None:
        search = (current_oaiserver.search_cls(
            index=current_app.config["OAISERVER_RECORD_INDEX"], ).params(
                scroll="{0}s".format(scroll), ).extra(
                    version=True, )[(page_ - 1) * size_:page_ * size_])

        if "set" in kwargs:
            search = search.query(
                current_oaiserver.set_records_query_fetcher(kwargs["set"]))

        time_range = {}
        if "from_" in kwargs:
            time_range["gte"] = kwargs["from_"]
        if "until" in kwargs:
            time_range["lte"] = kwargs["until"]
        if time_range:
            search = search.filter(
                "range", **{current_oaiserver.last_update_key: time_range})

        response = search.execute().to_dict()
    else:
        response = current_search_client.scroll(
            scroll_id=scroll_id,
            scroll="{0}s".format(scroll),
        )

    class Pagination(object):
        """Dummy pagination class."""

        page = page_
        per_page = size_

        def __init__(self, response):
            """Initilize pagination."""
            self.response = response
            self.total = (response["hits"]["total"] if ES_VERSION[0] < 7 else
                          response["hits"]["total"]["value"])
            self._scroll_id = response.get("_scroll_id")

            if self.total == 0:
                raise OAINoRecordsMatchError()

            # clean descriptor on last page
            if not self.has_next:
                current_search_client.clear_scroll(scroll_id=self._scroll_id)
                self._scroll_id = None

        @cached_property
        def has_next(self):
            """Return True if there is next page."""
            return self.page * self.per_page <= self.total

        @cached_property
        def next_num(self):
            """Return next page number."""
            return self.page + 1 if self.has_next else None

        @property
        def items(self):
            """Return iterator."""
            from datetime import datetime

            for result in self.response["hits"]["hits"]:
                yield {
                    "id":
                    result["_id"],
                    "json":
                    result,
                    "updated":
                    datetime.strptime(
                        result["_source"][current_oaiserver.last_update_key]
                        [:19],
                        "%Y-%m-%dT%H:%M:%S",
                    ),
                }

    return Pagination(response)
Exemplo n.º 5
0
def get_records(**kwargs):
    """Get records."""
    page_ = kwargs.get('resumptionToken', {}).get('page', 1)
    size_ = current_app.config['OAISERVER_PAGE_SIZE']
    scroll = current_app.config['OAISERVER_RESUMPTION_TOKEN_EXPIRE_TIME']
    scroll_id = kwargs.get('resumptionToken', {}).get('scroll_id')

    if scroll_id is None:
        query = Query()

        body = {}
        if 'set' in kwargs:
            body['must'] = [{'match': {'_oai.sets': kwargs['set']}}]

        time_range = {}
        if 'from_' in kwargs:
            time_range['gte'] = kwargs['from_']
        if 'until' in kwargs:
            time_range['lte'] = kwargs['until']
        if time_range:
            body['filter'] = [{'range': {'_oai.updated': time_range}}]

        if body:
            query.body = {'query': {'bool': body}}

        response = current_search_client.search(
            index=current_app.config['OAISERVER_RECORD_INDEX'],
            body=query.body,
            from_=(page_-1)*size_,
            size=size_,
            scroll='{0}s'.format(scroll),
        )
    else:
        response = current_search_client.scroll(
            scroll_id=scroll_id,
            scroll='{0}s'.format(scroll),
        )
        scroll_id = response.get('_scroll_id')

        # clean descriptor on last page
        if page * per_page > total:
            response = current_search_client.clear_scroll(
                scroll_id=scroll_id
            )
            scroll_id = None

    class Pagination(object):
        """Dummy pagination class."""

        # custom property for scrolling
        _scroll_id = scroll_id

        page = page_
        total = response['hits']['total']
        per_page = size_
        has_next = page * per_page <= total
        next_num = page + 1 if has_next else None

        @property
        def items(self):
            """Return iterator."""
            from datetime import datetime
            for result in response['hits']['hits']:
                yield {
                    'id': result['_id'],
                    'json': result['_source'],
                    'updated': datetime.strptime(
                        result['_source']['_oai']['updated'],
                        '%Y-%m-%dT%H:%M:%SZ'
                    ),
                }

    return Pagination()