def remove_expired_data(provider):
    """Remove expired data for provider"""
    logger.info('Removing expired content for provider: %s' % provider.get('_id', 'Detached items'))
    ingest_service = superdesk.get_resource_service('ingest')

    items = get_expired_items(provider)

    ids = [item['_id'] for item in items]
    items.rewind()
    file_ids = [rend.get('media')
                for item in items
                for rend in item.get('renditions', {}).values()
                if not item.get('archived') and rend.get('media')]

    if ids:
        logger.info('Removing items %s' % ids)
        ingest_service.delete({'_id': {'$in': ids}})
        push_expired_notification(ids)

    for file_id in file_ids:
        logger.info('Deleting file: %s' % file_id)
        superdesk.app.media.delete(file_id)

    stats.incr('ingest.expired_items', len(ids))
    logger.info('Removed expired content for provider: {0} count: {1}'
                .format(provider.get('_id', 'Detached items'), len(ids)))

    remove_expired_from_elastic()
def remove_expired_data(provider):
    """Remove expired data for provider"""
    logger.info('Removing expired content for provider: %s' % provider.get('_id', 'Detached items'))

    try:
        feeding_service = registered_feeding_services[provider['feeding_service']]
        feeding_service = feeding_service.__class__()
        ingest_collection = feeding_service.service if hasattr(feeding_service, 'service') else 'ingest'
    except KeyError:
        ingest_collection = 'ingest'

    ingest_service = superdesk.get_resource_service(ingest_collection)

    items = get_expired_items(provider, ingest_collection)

    ids = [item['_id'] for item in items]
    items.rewind()
    file_ids = [rend.get('media')
                for item in items
                for rend in item.get('renditions', {}).values()
                if not item.get('archived') and rend.get('media')]

    if ids:
        logger.info('Removing items %s' % ids)
        ingest_service.delete({'_id': {'$in': ids}})
        push_expired_notification(ids)

    for file_id in file_ids:
        logger.info('Deleting file: %s' % file_id)
        superdesk.app.media.delete(file_id)

    logger.info('Removed expired content for provider: {0} count: {1}'
                .format(provider.get('_id', 'Detached items'), len(ids)))

    remove_expired_from_elastic(ingest_collection)
Пример #3
0
def remove_expired_data(provider):
    """Remove expired data for provider"""
    logger.info('Removing expired content for provider: %s' %
                provider.get('_id', 'Detached items'))
    ingest_service = superdesk.get_resource_service('ingest')

    items = get_expired_items(provider)

    ids = [item['_id'] for item in items]
    items.rewind()
    file_ids = [
        rend.get('media') for item in items
        for rend in item.get('renditions', {}).values()
        if not item.get('archived') and rend.get('media')
    ]

    if ids:
        logger.info('Removing items %s' % ids)
        ingest_service.delete({'_id': {'$in': ids}})
        push_expired_notification(ids)

    for file_id in file_ids:
        logger.info('Deleting file: %s' % file_id)
        superdesk.app.media.delete(file_id)

    stats.incr('ingest.expired_items', len(ids))
    logger.info('Removed expired content for provider: {0} count: {1}'.format(
        provider.get('_id', 'Detached items'), len(ids)))

    remove_expired_from_elastic()
Пример #4
0
    def on_deleted(self, doc):
        if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self.packageService.on_deleted(doc)

        remove_media_files(doc)

        add_activity(ACTIVITY_DELETE, 'removed item {{ type }} about {{ subject }}',
                     self.datasource, item=doc,
                     type=doc[ITEM_TYPE], subject=get_subject(doc))
        push_expired_notification([doc])
Пример #5
0
    def on_deleted(self, doc):
        get_component(ItemAutosave).clear(doc['_id'])
        if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self.packageService.on_deleted(doc)

        remove_media_files(doc)

        add_activity(ACTIVITY_DELETE, 'removed item {{ type }} about {{ subject }}',
                     self.datasource, item=doc,
                     type=doc[ITEM_TYPE], subject=get_subject(doc))
        push_expired_notification([doc.get(config.ID_FIELD)])
        app.on_archive_item_deleted(doc)
Пример #6
0
    def on_deleted(self, doc):
        get_component(ItemAutosave).clear(doc['_id'])
        if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self.packageService.on_deleted(doc)

        remove_media_files(doc)

        add_activity(ACTIVITY_DELETE, 'removed item {{ type }} about {{ subject }}',
                     self.datasource, item=doc,
                     type=doc[ITEM_TYPE], subject=get_subject(doc))
        push_expired_notification([doc.get(config.ID_FIELD)])
        app.on_archive_item_deleted(doc)
Пример #7
0
    def on_deleted(self, doc):
        if doc[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            self.packageService.on_deleted(doc)

        remove_media_files(doc)

        add_activity(ACTIVITY_DELETE,
                     'removed item {{ type }} about {{ subject }}',
                     self.datasource,
                     item=doc,
                     type=doc[ITEM_TYPE],
                     subject=get_subject(doc))
        push_expired_notification([doc])
Пример #8
0
def remove_expired_data(provider):
    """Remove expired data for provider"""
    logger.info('Removing expired content for provider: %s' %
                provider.get('_id', 'Detached items'))

    try:
        feeding_service = registered_feeding_services[
            provider['feeding_service']]
        feeding_service = feeding_service.__class__()
        ingest_collection = feeding_service.service if hasattr(
            feeding_service, 'service') else 'ingest'
    except KeyError:
        ingest_collection = 'ingest'

    ingest_service = superdesk.get_resource_service(ingest_collection)

    items = get_expired_items(provider, ingest_collection)

    ids = [item['_id'] for item in items]
    items.rewind()
    file_ids = [
        rend.get('media') for item in items
        for rend in item.get('renditions', {}).values()
        if not item.get('archived') and rend.get('media')
    ]

    if ids:
        logger.info('Removing items %s' % ids)
        ingest_service.delete({'_id': {'$in': ids}})
        push_expired_notification(ids)

    for file_id in file_ids:
        logger.info('Deleting file: %s' % file_id)
        superdesk.app.media.delete(file_id)

    logger.info('Removed expired content for provider: {0} count: {1}'.format(
        provider.get('_id', 'Detached items'), len(ids)))

    remove_expired_from_elastic(ingest_collection)
Пример #9
0
def remove_expired_data(provider):
    """Remove expired data for provider"""
    logger.info("Removing expired content for provider: %s" %
                provider.get("_id", "Detached items"))

    try:
        feeding_service = get_feeding_service(provider["feeding_service"])
        ingest_collection = feeding_service.service if hasattr(
            feeding_service, "service") else "ingest"
    except KeyError:
        ingest_collection = "ingest"

    ingest_service = superdesk.get_resource_service(ingest_collection)

    items = get_expired_items(provider, ingest_collection)

    ids = [item["_id"] for item in items]
    items.rewind()
    file_ids = [
        rend.get("media") for item in items
        for rend in item.get("renditions", {}).values()
        if not item.get("archived") and rend.get("media")
    ]

    if ids:
        logger.info("Removing items %s" % ids)
        ingest_service.delete({"_id": {"$in": ids}})
        push_expired_notification(ids)

    for file_id in file_ids:
        logger.info("Deleting file: %s" % file_id)
        superdesk.app.media.delete(file_id)

    logger.info("Removed expired content for provider: {0} count: {1}".format(
        provider.get("_id", "Detached items"), len(ids)))

    remove_expired_from_elastic(ingest_collection)
Пример #10
0
    def _remove_expired_items(self, expiry_datetime):
        """
        Remove the expired items.
        :param datetime expiry_datetime: expiry datetime
        :param str log_msg: log message to be prefixed
        """
        logger.info('{} Starting to remove published expired items.'.format(
            self.log_msg))
        archive_service = get_resource_service(ARCHIVE)
        published_service = get_resource_service('published')
        items_to_remove = set()
        items_to_be_archived = dict()
        items_having_issues = dict()

        expired_items = list(
            archive_service.get_expired_items(expiry_datetime))
        if len(expired_items) == 0:
            logger.info('{} No items found to expire.'.format(self.log_msg))
            return

        # delete spiked items
        self.delete_spiked_items(expired_items)

        # get killed items
        killed_items = {
            item.get(config.ID_FIELD): item
            for item in expired_items
            if item.get(ITEM_STATE) in {CONTENT_STATE.KILLED}
        }

        # check if killed items imported to legal
        items_having_issues.update(
            self.check_if_items_imported_to_legal_archive(killed_items))

        # filter out the killed items not imported to legal.
        killed_items = {
            item_id: item
            for item_id, item in killed_items.items()
            if item_id not in items_having_issues
        }

        # Get the not killed and spiked items
        not_killed_items = {
            item.get(config.ID_FIELD): item
            for item in expired_items if item.get(ITEM_STATE) not in
            {CONTENT_STATE.KILLED, CONTENT_STATE.SPIKED}
        }

        log_msg_format = "{{'_id': {_id}, 'unique_name': {unique_name}, 'version': {_current_version}, " \
                         "'expired_on': {expiry}}}."

        # Processing items to expire
        for item_id, item in not_killed_items.items():
            item.setdefault(config.VERSION, 1)
            item.setdefault('expiry', expiry_datetime)
            item.setdefault('unique_name', '')
            expiry_msg = log_msg_format.format(**item)
            logger.info('{} Processing expired item. {}'.format(
                self.log_msg, expiry_msg))

            processed_items = dict()
            if item_id not in items_to_be_archived and item_id not in items_having_issues and \
                    self._can_remove_item(item, processed_items):
                # item can be archived and removed from the database
                logger.info('{} Removing item. {}'.format(
                    self.log_msg, expiry_msg))
                logger.info('{} Items to be removed. {}'.format(
                    self.log_msg, processed_items))
                issues = self.check_if_items_imported_to_legal_archive(
                    processed_items)
                if issues:
                    items_having_issues.update(processed_items)
                else:
                    items_to_be_archived.update(processed_items)

        # all items to expire
        items_to_expire = deepcopy(items_to_be_archived)

        # check once again in items imported to legal
        items_having_issues.update(
            self.check_if_items_imported_to_legal_archive(items_to_expire))
        if items_having_issues:
            # remove items not imported to legal
            items_to_expire = {
                item_id: item
                for item_id, item in items_to_expire.items()
                if item_id not in items_having_issues
            }

            # remove items not imported to legal from archived items
            items_to_be_archived = {
                item_id: item
                for item_id, item in items_to_be_archived.items()
                if item_id not in items_having_issues
            }

            # items_to_be_archived might contain killed items
            for item_id, item in items_to_be_archived.items():
                if item.get(ITEM_STATE) == CONTENT_STATE.KILLED:
                    killed_items[item_id] = item

            # remove killed items from the items_to_be_archived
            items_to_be_archived = {
                item_id: item
                for item_id, item in items_to_be_archived.items()
                if item.get(ITEM_STATE) != CONTENT_STATE.KILLED
            }

        # add killed items to items to expire
        items_to_expire.update(killed_items)

        # get the filter conditions
        logger.info('{} filter conditions.'.format(self.log_msg))
        req = ParsedRequest()
        filter_conditions = list(
            get_resource_service('content_filters').get(
                req=req, lookup={'is_archived_filter': True}))

        # move to archived collection
        logger.info('{} Archiving items.'.format(self.log_msg))
        for item_id, item in items_to_be_archived.items():
            self._move_to_archived(item, filter_conditions)

        for item_id, item in killed_items.items():
            # delete from the published collection and queue
            msg = log_msg_format.format(**item)
            try:
                published_service.delete_by_article_id(item_id)
                logger.info(
                    '{} Deleting killed item from published. {}'.format(
                        self.log_msg, msg))
                items_to_remove.add(item_id)
            except:
                logger.exception(
                    '{} Failed to delete killed item from published. {}'.
                    format(self.log_msg, msg))

        if items_to_remove:
            logger.info('{} Deleting articles.: {}'.format(
                self.log_msg, items_to_remove))
            archive_service.delete_by_article_ids(list(items_to_remove))

        push_expired_notification(items_to_expire)

        for item_id, item in items_having_issues.items():
            msg = log_msg_format.format(**item)
            try:
                archive_service.system_update(item.get(config.ID_FIELD),
                                              {'expiry_status': 'invalid'},
                                              item)
                logger.info('{} Setting item expiry status. {}'.format(
                    self.log_msg, msg))
            except:
                logger.exception(
                    '{} Failed to set expiry status for item. {}'.format(
                        self.log_msg, msg))

        logger.info('{} Deleting killed from archive.'.format(self.log_msg))
Пример #11
0
    def _remove_expired_items(self, expiry_datetime):
        """Remove the expired items.

        :param datetime expiry_datetime: expiry datetime
        :param str log_msg: log message to be prefixed
        """
        logger.info('{} Starting to remove published expired items.'.format(self.log_msg))
        archive_service = get_resource_service(ARCHIVE)
        published_service = get_resource_service('published')
        items_to_remove = set()
        items_to_be_archived = dict()
        items_having_issues = dict()

        expired_items = list(archive_service.get_expired_items(expiry_datetime))
        if len(expired_items) == 0:
            logger.info('{} No items found to expire.'.format(self.log_msg))
            return

        # delete spiked items
        self.delete_spiked_items(expired_items)

        # get killed items
        killed_items = {item.get(config.ID_FIELD): item
                        for item in expired_items if item.get(ITEM_STATE) in {CONTENT_STATE.KILLED}}

        # check if killed items imported to legal
        items_having_issues.update(self.check_if_items_imported_to_legal_archive(killed_items))

        # filter out the killed items not imported to legal.
        killed_items = {item_id: item for item_id, item in killed_items.items()
                        if item_id not in items_having_issues}

        # Get the not killed and spiked items
        not_killed_items = {item.get(config.ID_FIELD): item for item in expired_items
                            if item.get(ITEM_STATE) not in {CONTENT_STATE.KILLED, CONTENT_STATE.SPIKED}}

        log_msg_format = "{{'_id': {_id}, 'unique_name': {unique_name}, 'version': {_current_version}, " \
                         "'expired_on': {expiry}}}."

        # Processing items to expire
        for item_id, item in not_killed_items.items():
            item.setdefault(config.VERSION, 1)
            item.setdefault('expiry', expiry_datetime)
            item.setdefault('unique_name', '')
            expiry_msg = log_msg_format.format(**item)
            logger.info('{} Processing expired item. {}'.format(self.log_msg, expiry_msg))

            processed_items = dict()
            if item_id not in items_to_be_archived and item_id not in items_having_issues and \
                    self._can_remove_item(item, processed_items):
                # item can be archived and removed from the database
                logger.info('{} Removing item. {}'.format(self.log_msg, expiry_msg))
                logger.info('{} Items to be removed. {}'.format(self.log_msg, processed_items))
                issues = self.check_if_items_imported_to_legal_archive(processed_items)
                if issues:
                    items_having_issues.update(processed_items)
                else:
                    items_to_be_archived.update(processed_items)

        # all items to expire
        items_to_expire = deepcopy(items_to_be_archived)

        # check once again in items imported to legal
        items_having_issues.update(self.check_if_items_imported_to_legal_archive(items_to_expire))
        if items_having_issues:
            # remove items not imported to legal
            items_to_expire = {item_id: item for item_id, item in items_to_expire.items()
                               if item_id not in items_having_issues}

            # remove items not imported to legal from archived items
            items_to_be_archived = {item_id: item for item_id, item in items_to_be_archived.items()
                                    if item_id not in items_having_issues}

            # items_to_be_archived might contain killed items
            for item_id, item in items_to_be_archived.items():
                if item.get(ITEM_STATE) == CONTENT_STATE.KILLED:
                    killed_items[item_id] = item

            # remove killed items from the items_to_be_archived
            items_to_be_archived = {item_id: item for item_id, item in items_to_be_archived.items()
                                    if item.get(ITEM_STATE) != CONTENT_STATE.KILLED}

        # add killed items to items to expire
        items_to_expire.update(killed_items)

        # get the filter conditions
        logger.info('{} filter conditions.'.format(self.log_msg))
        req = ParsedRequest()
        filter_conditions = list(get_resource_service('content_filters').get(req=req,
                                                                             lookup={'is_archived_filter': True}))

        # move to archived collection
        logger.info('{} Archiving items.'.format(self.log_msg))
        for item_id, item in items_to_be_archived.items():
            self._move_to_archived(item, filter_conditions)

        for item_id, item in killed_items.items():
            # delete from the published collection and queue
            msg = log_msg_format.format(**item)
            try:
                published_service.delete_by_article_id(item_id)
                logger.info('{} Deleting killed item from published. {}'.format(self.log_msg, msg))
                items_to_remove.add(item_id)
            except:
                logger.exception('{} Failed to delete killed item from published. {}'.format(self.log_msg, msg))

        if items_to_remove:
            logger.info('{} Deleting articles.: {}'.format(self.log_msg, items_to_remove))
            archive_service.delete_by_article_ids(list(items_to_remove))

        push_expired_notification(items_to_expire)

        for item_id, item in items_having_issues.items():
            msg = log_msg_format.format(**item)
            try:
                archive_service.system_update(item.get(config.ID_FIELD), {'expiry_status': 'invalid'}, item)
                logger.info('{} Setting item expiry status. {}'.format(self.log_msg, msg))
            except:
                logger.exception('{} Failed to set expiry status for item. {}'.format(self.log_msg, msg))

        logger.info('{} Deleting killed from archive.'.format(self.log_msg))