示例#1
0
    def test_query_getting_overdue_scheduled_content(self):
        self.app.data.insert(ARCHIVE, [{'publish_schedule': get_expiry_date(-10), 'state': 'published'}])
        self.app.data.insert(ARCHIVE, [{'publish_schedule': get_expiry_date(-10), 'state': 'scheduled'}])
        self.app.data.insert(ARCHIVE, [{'publish_schedule': get_expiry_date(0), 'state': 'spiked'}])
        self.app.data.insert(ARCHIVE, [{'publish_schedule': get_expiry_date(10), 'state': 'scheduled'}])
        self.app.data.insert(ARCHIVE, [{'unique_id': 97, 'state': 'spiked'}])

        now = date_to_str(utcnow())
        overdueItems = get_overdue_scheduled_items(now, 'archive')
        self.assertEquals(1, overdueItems.count())
示例#2
0
 def test_get_expiry_date(self):
     self.assertIsInstance(get_expiry_date(minutes=60), datetime)
     date1 = utcnow() + timedelta(minutes=60)
     date2 = get_expiry_date(minutes=60)
     self.assertEqual(date1.year, date2.year)
     self.assertEqual(date1.month, date2.month)
     self.assertEqual(date1.day, date2.day)
     self.assertEqual(date1.hour, date2.hour)
     self.assertEqual(date1.minute, date2.minute)
     self.assertEqual(date1.second, date2.second)
    def __set_published_item_expiry(self, doc):
        desk_id = doc.get('task', {}).get('desk', None)
        desk = {}

        if desk_id:
            desk = get_resource_service('desks').find_one(req=None, _id=desk_id)

        expiry_minutes = desk.get('published_item_expiry', config.PUBLISHED_ITEMS_EXPIRY_MINUTES)
        doc['expiry'] = get_expiry_date(expiry_minutes, offset=doc[EMBARGO]) if doc.get(EMBARGO) else \
            get_expiry_date(expiry_minutes)
 def setUp(self):
     super().setUp()
     with self.app.app_context():
         self.app.data.insert('archive', [{'expiry': get_expiry_date(-10)}])
         self.app.data.insert('archive', [{'expiry': get_expiry_date(0)}])
         self.app.data.insert('archive', [{'expiry': get_expiry_date(10)}])
         self.app.data.insert('archive', [{'expiry': get_expiry_date(20)}])
         self.app.data.insert('archive', [{'expiry': get_expiry_date(30)}])
         self.app.data.insert('archive', [{'expiry': None}])
         self.app.data.insert('archive', [{'unique_id': 97}])
         init_app(self.app)
示例#5
0
    def test_query_getting_overdue_scheduled_content(self):
        with self.app.app_context():
            self.app.data.insert(ARCHIVE, [{"publish_schedule": get_expiry_date(-10), "state": "published"}])
            self.app.data.insert(ARCHIVE, [{"publish_schedule": get_expiry_date(-10), "state": "scheduled"}])
            self.app.data.insert(ARCHIVE, [{"publish_schedule": get_expiry_date(0), "state": "spiked"}])
            self.app.data.insert(ARCHIVE, [{"publish_schedule": get_expiry_date(10), "state": "scheduled"}])
            self.app.data.insert(ARCHIVE, [{"unique_id": 97, "state": "spiked"}])

            now = date_to_str(utcnow())
            overdueItems = get_overdue_scheduled_items(now, "archive")
            self.assertEquals(1, overdueItems.count())
示例#6
0
    def test_query_getting_expired_content(self):
        self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(-10), 'state': 'spiked'}])
        self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(0), 'state': 'spiked'}])
        self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(10), 'state': 'spiked'}])
        self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(20), 'state': 'spiked'}])
        self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(30), 'state': 'spiked'}])
        self.app.data.insert(ARCHIVE, [{'expiry': None, 'state': 'spiked'}])
        self.app.data.insert(ARCHIVE, [{'unique_id': 97, 'state': 'spiked'}])

        now = utcnow()
        expired_items = get_resource_service(ARCHIVE).get_expired_items(now)
        self.assertEquals(1, expired_items.count())
示例#7
0
    def test_query_getting_expired_content(self):
        with self.app.app_context():
            self.app.data.insert(ARCHIVE, [{"expiry": get_expiry_date(-10), "state": "spiked"}])
            self.app.data.insert(ARCHIVE, [{"expiry": get_expiry_date(0), "state": "spiked"}])
            self.app.data.insert(ARCHIVE, [{"expiry": get_expiry_date(10), "state": "spiked"}])
            self.app.data.insert(ARCHIVE, [{"expiry": get_expiry_date(20), "state": "spiked"}])
            self.app.data.insert(ARCHIVE, [{"expiry": get_expiry_date(30), "state": "spiked"}])
            self.app.data.insert(ARCHIVE, [{"expiry": None, "state": "spiked"}])
            self.app.data.insert(ARCHIVE, [{"unique_id": 97, "state": "spiked"}])

            now = date_to_str(utcnow())
            expired_items = RemoveExpiredSpikeContent().get_expired_items(now)
            self.assertEquals(2, expired_items.count())
    def test_query_getting_expired_content(self):
        with self.app.app_context():
            self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(-10), 'state': 'spiked'}])
            self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(0), 'state': 'spiked'}])
            self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(10), 'state': 'spiked'}])
            self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(20), 'state': 'spiked'}])
            self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(30), 'state': 'spiked'}])
            self.app.data.insert(ARCHIVE, [{'expiry': None, 'state': 'spiked'}])
            self.app.data.insert(ARCHIVE, [{'unique_id': 97, 'state': 'spiked'}])

            now = date_to_str(utcnow())
            expiredItems = RemoveExpiredSpikeContent().get_expired_items(now)
            self.assertEquals(2, expiredItems.count())
示例#9
0
    def on_update(self, updates, original):
        if updates.get('content_expiry') == 0:
            updates['content_expiry'] = app.settings['CONTENT_EXPIRY_MINUTES']

        super().on_update(updates, original)

        if updates.get('content_expiry', None):
            docs = self.get_stage_documents(str(original[config.ID_FIELD]))
            for doc in docs:
                expiry = get_expiry_date(updates['content_expiry'], doc['versioncreated'])
                item_model = get_model(ItemModel)
                item_model.update({'_id': doc[config.ID_FIELD]}, {'expiry': expiry})

        if updates.get('working_stage', False):
            if not original.get('working_stage'):
                self.remove_old_default(original.get('desk'), 'working_stage')
                self.set_desk_ref(original, 'working_stage')
        else:
            if original.get('working_stage') and 'working_stage' in updates:
                raise SuperdeskApiError.forbiddenError(message='Must have one working stage in a desk')

        if updates.get('default_incoming', False):
            if not original.get('default_incoming'):
                self.remove_old_default(original.get('desk'), 'default_incoming')
                self.set_desk_ref(original, 'incoming_stage')
        else:
            if original.get('default_incoming') and 'default_incoming' in updates:
                raise SuperdeskApiError.forbiddenError(message='Must have one incoming stage in a desk')
示例#10
0
    def update(self, id, updates, original):
        original_state = original[config.CONTENT_STATE]
        if not is_workflow_state_transition_valid("spike", original_state):
            raise InvalidStateTransitionError()

        package_service = PackageService()
        user = get_user(required=True)

        item = get_resource_service(ARCHIVE).find_one(req=None, _id=id)
        expiry_minutes = app.settings["SPIKE_EXPIRY_MINUTES"]

        # check if item is in a desk. If it's then use the desks spike_expiry
        if is_assigned_to_a_desk(item):
            desk = get_resource_service("desks").find_one(_id=item["task"]["desk"], req=None)
            expiry_minutes = desk.get("spike_expiry", expiry_minutes)

        updates[EXPIRY] = get_expiry_date(expiry_minutes)
        updates[REVERT_STATE] = item.get(app.config["CONTENT_STATE"], None)

        if original.get("rewrite_of"):
            updates["rewrite_of"] = None

        item = self.backend.update(self.datasource, id, updates, original)
        push_notification("item:spike", item=str(item.get("_id")), user=str(user))
        package_service.remove_spiked_refs_from_package(id)
        return item
示例#11
0
    def test_query_removing_media_files_keeps(self):
        with self.app.app_context():
            self.app.data.insert(ARCHIVE, [{
                'state': 'spiked',
                'expiry': get_expiry_date(-10),
                'type': 'picture',
                'renditions': self.media
            }])

            self.app.data.insert('ingest', [{
                'type': 'picture',
                'renditions': self.media
            }])
            self.app.data.insert('archive_versions', [{
                'type': 'picture',
                'renditions': self.media
            }])
            self.app.data.insert('legal_archive', [{
                '_id': 1,
                'type': 'picture',
                'renditions': self.media
            }])
            self.app.data.insert('legal_archive_versions',
                                 [{
                                     '_id': 1,
                                     'type': 'picture',
                                     'renditions': self.media
                                 }])

            archive_items = self.app.data.find_all('archive', None)
            self.assertEqual(archive_items.count(), 1)
            deleted = remove_media_files(archive_items[0])
            self.assertFalse(deleted)
示例#12
0
    def test_remove_renditions_from_all_versions(self):
        renditions = copy.copy(self.media)

        ids = self.app.data.insert(
            ARCHIVE,
            [{
                "state": "spiked",
                "expiry": get_expiry_date(-10),
                "type": "picture",
                "renditions": {},
            }],
        )

        self.app.data.insert(
            "archive_versions",
            [{
                "_id_document": ids[0],
                "type": "picture",
                "renditions": renditions,
            }],
        )

        with patch.object(self.app.media, "delete") as media_delete:
            get_resource_service("archive").delete_by_article_ids(ids)
            for key, rendition in renditions.items():
                media_delete.assert_any_call(rendition["media"])
    def update(self, id, updates, original):
        original_state = original[ITEM_STATE]
        if not is_workflow_state_transition_valid('spike', original_state):
            raise InvalidStateTransitionError()

        package_service = PackageService()
        user = get_user(required=True)

        item = get_resource_service(ARCHIVE).find_one(req=None, _id=id)
        expiry_minutes = app.settings['SPIKE_EXPIRY_MINUTES']

        # check if item is in a desk. If it's then use the desks spike_expiry
        if is_assigned_to_a_desk(item):
            desk = get_resource_service('desks').find_one(_id=item['task']['desk'], req=None)
            expiry_minutes = desk.get('spike_expiry', expiry_minutes)

        updates[EXPIRY] = get_expiry_date(expiry_minutes)
        updates[REVERT_STATE] = item.get(ITEM_STATE, None)

        if original.get('rewrite_of'):
            updates['rewrite_of'] = None

        item = self.backend.update(self.datasource, id, updates, original)
        push_notification('item:spike', item=str(item.get('_id')), user=str(user))
        package_service.remove_spiked_refs_from_package(id)
        return item
示例#14
0
def ingest_item(item, provider, rule_set=None, routing_scheme=None):
    try:
        item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML))
        item[FAMILY_ID] = item[superdesk.config.ID_FIELD]
        providers[provider.get('type')].provider = provider

        item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault('source', provider.get('source', ''))
        set_default_state(item, STATE_INGESTED)
        item['expiry'] = get_expiry_date(provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES']),
                                         item.get('versioncreated'))

        if 'anpa_category' in item:
            process_anpa_category(item, provider)

        if 'subject' in item:
            process_iptc_codes(item, provider)
            if 'anpa_category' not in item:
                derive_category(item, provider)
        elif 'anpa_category' in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        ingest_service = superdesk.get_resource_service('ingest')

        if item.get('ingest_provider_sequence') is None:
            ingest_service.set_ingest_provider_sequence(item, provider)

        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend:
                href = providers[provider.get('type')].prepare_href(baseImageRend['href'])
                update_renditions(item, href, old_item)

        if old_item:
            # In case we already have the item, preserve the _id
            item[superdesk.config.ID_FIELD] = old_item[superdesk.config.ID_FIELD]
            ingest_service.put_in_mongo(item[superdesk.config.ID_FIELD], item)
        else:
            try:
                ingest_service.post_in_mongo([item])
            except HTTPException as e:
                logger.error("Exception while persisting item in ingest collection", e)

        if routing_scheme:
            routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme)
    except Exception as ex:
        logger.exception(ex)
        try:
            superdesk.app.sentry.captureException()
        except:
            pass
        return False
    return True
示例#15
0
    def test_query_getting_expired_content(self):
        now = utcnow()

        self.app.data.insert(ARCHIVE, [{'expiry': now - timedelta(minutes=10), 'state': 'spiked',
                                        'unique_id': 'expired'}])
        self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(0), 'state': 'spiked'}])
        self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(10), 'state': 'spiked'}])
        self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(20), 'state': 'spiked'}])
        self.app.data.insert(ARCHIVE, [{'expiry': get_expiry_date(30), 'state': 'spiked'}])
        self.app.data.insert(ARCHIVE, [{'expiry': None, 'state': 'spiked'}])
        self.app.data.insert(ARCHIVE, [{'unique_id': 97, 'state': 'spiked'}])

        expired_items = get_resource_service(ARCHIVE).get_expired_items(now)
        now = utcnow()
        for expired_items in get_resource_service(ARCHIVE).get_expired_items(now):
            self.assertEquals(1, len(expired_items))
            self.assertEquals('expired', expired_items[0]['unique_id'])
示例#16
0
    def __set_published_item_expiry(self, doc):
        desk_id = doc.get("task", {}).get("desk", None)
        desk = {}

        if desk_id:
            desk = get_resource_service("desks").find_one(req=None, _id=desk_id)

        expiry_minutes = desk.get("published_item_expiry", config.PUBLISHED_ITEMS_EXPIRY_MINUTES)
        doc["expiry"] = get_expiry_date(expiry_minutes)
示例#17
0
    def test_query_getting_expired_content(self):
        now = utcnow()

        self.app.data.insert(ARCHIVE, [
            {'expiry': get_expiry_date(0), 'state': 'spiked'},
            {'expiry': get_expiry_date(10), 'state': 'spiked'},
            {'expiry': get_expiry_date(20), 'state': 'spiked'},
            {'expiry': get_expiry_date(30), 'state': 'spiked'},
            {'expiry': None, 'state': 'spiked'},
            {'unique_id': 97, 'state': 'spiked'},
            {'expiry': now - timedelta(minutes=10), 'state': 'spiked', 'unique_id': 100},
        ])

        expired_items = get_resource_service(ARCHIVE).get_expired_items(now)
        now = utcnow()
        for expired_items in get_resource_service(ARCHIVE).get_expired_items(now):
            self.assertEquals(1, len(expired_items))
            self.assertEquals(100, expired_items[0]['unique_id'])
示例#18
0
    def test_query_getting_expired_content(self):
        now = utcnow()

        self.app.data.insert(
            ARCHIVE,
            [
                {
                    "expiry": get_expiry_date(0),
                    "state": "spiked"
                },
                {
                    "expiry": get_expiry_date(10),
                    "state": "spiked"
                },
                {
                    "expiry": get_expiry_date(20),
                    "state": "spiked"
                },
                {
                    "expiry": get_expiry_date(30),
                    "state": "spiked"
                },
                {
                    "expiry": None,
                    "state": "spiked"
                },
                {
                    "unique_id": 97,
                    "state": "spiked"
                },
                {
                    "expiry": now - timedelta(minutes=10),
                    "state": "spiked",
                    "unique_id": 100
                },
            ],
        )

        expired_items = get_resource_service(ARCHIVE).get_expired_items(now)
        now = utcnow()
        for expired_items in get_resource_service(ARCHIVE).get_expired_items(
                now):
            self.assertEquals(1, len(expired_items))
            self.assertEquals(100, expired_items[0]["unique_id"])
示例#19
0
def ingest_item(item, provider, rule_set=None, routing_scheme=None):
    try:
        item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML))
        item[FAMILY_ID] = item[superdesk.config.ID_FIELD]
        providers[provider.get("type")].provider = provider

        item["ingest_provider"] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault("source", provider.get("source", ""))
        set_default_state(item, STATE_INGESTED)
        item["expiry"] = get_expiry_date(
            provider.get("content_expiry", INGEST_EXPIRY_MINUTES), item.get("versioncreated")
        )

        if "anpa-category" in item:
            process_anpa_category(item, provider)

        if "subject" in item:
            process_iptc_codes(item, provider)

        apply_rule_set(item, provider, rule_set)

        ingest_service = superdesk.get_resource_service("ingest")

        if item.get("ingest_provider_sequence") is None:
            ingest_service.set_ingest_provider_sequence(item, provider)

        rend = item.get("renditions", {})
        if rend:
            baseImageRend = rend.get("baseImage") or next(iter(rend.values()))
            if baseImageRend:
                href = providers[provider.get("type")].prepare_href(baseImageRend["href"])
                update_renditions(item, href)

        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        if old_item:
            # In case we already have the item, preserve the _id
            item[superdesk.config.ID_FIELD] = old_item[superdesk.config.ID_FIELD]
            ingest_service.put_in_mongo(item[superdesk.config.ID_FIELD], item)
        else:
            try:
                ingest_service.post_in_mongo([item])
            except HTTPException as e:
                logger.error("Exception while persisting item in ingest collection", e)

        if routing_scheme:
            routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service("routing_schemes").apply_routing_scheme(routed, provider, routing_scheme)
    except Exception as ex:
        logger.exception(ex)
        try:
            superdesk.app.sentry.captureException()
        except:
            pass
        return False
    return True
示例#20
0
 def test_get_expiry_date_with_offset(self):
     offset = utcnow() + timedelta(minutes=10)
     date1 = offset + timedelta(minutes=5)
     date2 = get_expiry_date(minutes=5, offset=offset)
     self.assertEqual(date1.year, date2.year)
     self.assertEqual(date1.month, date2.month)
     self.assertEqual(date1.day, date2.day)
     self.assertEqual(date1.hour, date2.hour)
     self.assertEqual(date1.minute, date2.minute)
     self.assertEqual(date1.second, date2.second)
示例#21
0
 def on_update(self, updates, original):
     if updates.get('content_expiry') == 0:
         updates['content_expiry'] = app.settings['CONTENT_EXPIRY_MINUTES']
     super().on_update(updates, original)
     if updates.get('content_expiry', None):
         docs = self.get_stage_documents(str(original['_id']))
         for doc in docs:
             expiry = get_expiry_date(updates['content_expiry'], doc['versioncreated'])
             item_model = get_model(ItemModel)
             item_model.update({'_id': doc['_id']}, {'expiry': expiry})
示例#22
0
 def test_get_expiry_date_with_offset(self):
     offset = utcnow() + timedelta(minutes=10)
     date1 = offset + timedelta(minutes=5)
     date2 = get_expiry_date(minutes=5, offset=offset)
     self.assertEqual(date1.year, date2.year)
     self.assertEqual(date1.month, date2.month)
     self.assertEqual(date1.day, date2.day)
     self.assertEqual(date1.hour, date2.hour)
     self.assertEqual(date1.minute, date2.minute)
     self.assertEqual(date1.second, date2.second)
示例#23
0
 def on_update(self, updates, original):
     if updates.get('content_expiry') == 0:
         updates['content_expiry'] = app.settings['CONTENT_EXPIRY_MINUTES']
     super().on_update(updates, original)
     if updates.get('content_expiry', None):
         docs = self.get_stage_documents(str(original['_id']))
         for doc in docs:
             expiry = get_expiry_date(updates['content_expiry'],
                                      doc['versioncreated'])
             item_model = get_model(ItemModel)
             item_model.update({'_id': doc['_id']}, {'expiry': expiry})
示例#24
0
 def _get_spike_expiry(self, desk_id, stage_id):
     """
     If there is a SPIKE_EXPIRY_MINUTES setting then that is used to set the spike expiry.
     If a None value is configured then the desk/stage value is returned.
     :param desk_id:
     :param stage_id:
     :return:
     """
     # If no maximum spike expiry is set then return the desk/stage values
     if app.settings['SPIKE_EXPIRY_MINUTES'] is None:
         return get_expiry(desk_id=desk_id, stage_id=stage_id)
     else:
         return get_expiry_date(app.settings['SPIKE_EXPIRY_MINUTES'])
示例#25
0
 def _get_spike_expiry(self, desk_id, stage_id):
     """
     If there is a SPIKE_EXPIRY_MINUTES setting then that is used to set the spike expiry.
     If a None value is configured then the desk/stage value is returned.
     :param desk_id:
     :param stage_id:
     :return:
     """
     # If no maximum spike expiry is set then return the desk/stage values
     if app.settings['SPIKE_EXPIRY_MINUTES'] is None:
         return get_expiry(desk_id=desk_id, stage_id=stage_id)
     else:
         return get_expiry_date(app.settings['SPIKE_EXPIRY_MINUTES'])
示例#26
0
    def test_query_getting_overdue_scheduled_content(self):
        self.app.data.insert(ARCHIVE, [{
            'publish_schedule': get_expiry_date(-10),
            'state': 'published'
        }])
        self.app.data.insert(ARCHIVE, [{
            'publish_schedule': get_expiry_date(-10),
            'state': 'scheduled'
        }])
        self.app.data.insert(ARCHIVE, [{
            'publish_schedule': get_expiry_date(0),
            'state': 'spiked'
        }])
        self.app.data.insert(ARCHIVE, [{
            'publish_schedule': get_expiry_date(10),
            'state': 'scheduled'
        }])
        self.app.data.insert(ARCHIVE, [{'unique_id': 97, 'state': 'spiked'}])

        now = date_to_str(utcnow())
        overdueItems = get_overdue_scheduled_items(now, 'archive')
        self.assertEquals(1, overdueItems.count())
示例#27
0
    def test_query_removing_media_files_keeps(self):
        self.app.data.insert(ARCHIVE, [{'state': 'spiked',
                                        'expiry': get_expiry_date(-10),
                                        'type': 'picture',
                                        'renditions': self.media}])

        self.app.data.insert('ingest', [{'type': 'picture', 'renditions': self.media}])
        self.app.data.insert('archive_versions', [{'type': 'picture', 'renditions': self.media}])
        self.app.data.insert('legal_archive', [{'_id': 1, 'type': 'picture', 'renditions': self.media}])
        self.app.data.insert('legal_archive_versions', [{'_id': 1, 'type': 'picture', 'renditions': self.media}])

        archive_items = self.app.data.find_all('archive', None)
        self.assertEqual(archive_items.count(), 1)
        deleted = remove_media_files(archive_items[0])
        self.assertFalse(deleted)
示例#28
0
    def test_query_removing_media_files_keeps(self):
        with self.app.app_context():
            self.app.data.insert(
                ARCHIVE,
                [{"state": "spiked", "expiry": get_expiry_date(-10), "type": "picture", "renditions": self.media}],
            )

            self.app.data.insert("ingest", [{"type": "picture", "renditions": self.media}])
            self.app.data.insert("archive_versions", [{"type": "picture", "renditions": self.media}])
            self.app.data.insert("legal_archive", [{"_id": 1, "type": "picture", "renditions": self.media}])
            self.app.data.insert("legal_archive_versions", [{"_id": 1, "type": "picture", "renditions": self.media}])

            archive_items = self.app.data.find_all("archive", None)
            self.assertEqual(archive_items.count(), 1)
            deleted = remove_media_files(archive_items[0])
            self.assertFalse(deleted)
示例#29
0
    def update(self, id, updates, original):
        user = get_user(required=True)

        item = get_resource_service(ARCHIVE).find_one(req=None, _id=id)
        expiry_minutes = app.settings['SPIKE_EXPIRY_MINUTES']

        # check if item is in a desk. If it's then use the desks spike_expiry
        if is_assigned_to_a_desk(item):
            desk = get_resource_service('desks').find_one(_id=item['task']['desk'], req=None)
            expiry_minutes = desk.get('spike_expiry', expiry_minutes)

        updates[EXPIRY] = get_expiry_date(expiry_minutes)
        updates[REVERT_STATE] = item.get(app.config['CONTENT_STATE'], None)

        item = self.backend.update(self.datasource, id, updates, original)
        push_notification('item:spike', item=str(item.get('_id')), user=str(user))

        return item
示例#30
0
    def update(self, id, updates):
        user = get_user(required=True)

        item = get_resource_service(ARCHIVE).find_one(req=None, _id=id)
        expiry_minutes = app.settings['SPIKE_EXPIRY_MINUTES']

        # check if item is in a desk. If it's then use the desks spike_expiry
        if is_assigned_to_a_desk(item):
            desk = get_resource_service('desks').find_one(_id=item['task']['desk'], req=None)
            expiry_minutes = desk.get('spike_expiry', expiry_minutes)

        updates[EXPIRY] = get_expiry_date(expiry_minutes)
        updates[REVERT_STATE] = item.get(app.config['CONTENT_STATE'], None)

        item = self.backend.update(self.datasource, id, updates)
        push_notification('item:spike', item=str(item.get('_id')), user=str(user))

        return item
示例#31
0
    def spike(self, filter, user):
        item_model = get_model(ItemModel)
        item = item_model.find_one(filter)
        if item and can_lock(item, user):
            expiry_minutes = app.settings['SPIKE_EXPIRY_MINUTES']
            # check if item is in a desk
            if "task" in item and "desk" in item["task"]:
                    # then use the desks spike_expiry
                    desk = get_resource_service('desks').find_one(_id=item["task"]["desk"], req=None)
                    expiry_minutes = desk.get('spike_expiry', expiry_minutes)

            updates = {IS_SPIKED: True, EXPIRY: get_expiry_date(expiry_minutes)}
            item_model.update(filter, updates)
            push_notification('item:spike', item=str(item.get('_id')), user=str(user))
        else:
            raise SuperdeskError("Item couldn't be spiked. It is locked by another user")
        item = item_model.find_one(filter)
        return item
示例#32
0
def get_item_expiry(desk, stage, offset=None):
    """
    Calculate expiry date of the item.
    Order of precedence is:
    1. Stage Content Expiry
    2. Desk Content Expiry
    3. Default Content expiry in Settings ('CONTENT_EXPIRY_MINUTES').
    :param dict desk: desk where the item is located
    :param dict stage: stage where the item is located
    :param datetime offset: datetime passed in case of embargo.
    :return datetime: expiry datetime
    """
    expiry_minutes = app.settings['CONTENT_EXPIRY_MINUTES']
    if stage and stage.get('content_expiry'):
        expiry_minutes = stage.get('content_expiry')
    elif desk and desk.get('content_expiry'):
        expiry_minutes = desk.get('content_expiry')

    return get_expiry_date(expiry_minutes, offset=offset)
示例#33
0
    def test_remove_renditions_from_all_versions(self):
        renditions = copy.copy(self.media)

        ids = self.app.data.insert(ARCHIVE, [{
            'state': 'spiked',
            'expiry': get_expiry_date(-10),
            'type': 'picture',
            'renditions': {},
        }])

        self.app.data.insert('archive_versions', [{
            '_id_document': ids[0],
            'type': 'picture',
            'renditions': renditions,
        }])

        with patch.object(self.app.media, 'delete') as media_delete:
            get_resource_service('archive').delete_by_article_ids(ids)
            for key, rendition in renditions.items():
                media_delete.assert_any_call(rendition['media'])
示例#34
0
    def test_remove_renditions_from_all_versions(self):
        renditions = copy.copy(self.media)

        ids = self.app.data.insert(ARCHIVE, [{
            'state': 'spiked',
            'expiry': get_expiry_date(-10),
            'type': 'picture',
            'renditions': {},
        }])

        self.app.data.insert('archive_versions', [{
            '_id_document': ids[0],
            'type': 'picture',
            'renditions': renditions,
        }])

        with patch.object(self.app.media, 'delete') as media_delete:
            get_resource_service('archive').delete_by_article_ids(ids)
            for key, rendition in renditions.items():
                media_delete.assert_any_call(rendition['media'])
示例#35
0
    def on_update(self, updates, original):
        if updates.get('content_expiry') == 0:
            updates['content_expiry'] = app.settings['CONTENT_EXPIRY_MINUTES']
        super().on_update(updates, original)
        if updates.get('content_expiry', None):
            docs = self.get_stage_documents(str(original['_id']))
            for doc in docs:
                expiry = get_expiry_date(updates['content_expiry'],
                                         doc['versioncreated'])
                item_model = get_model(ItemModel)
                item_model.update({'_id': doc['_id']}, {'expiry': expiry})

        if updates.get('default_incoming', False):
            if not original.get('default_incoming'):
                self.remove_old_default(original.get('desk'),
                                        'default_incoming')
                self.set_desk_ref(original, 'incoming_stage')
        else:
            if original.get(
                    'default_incoming') and 'default_incoming' in updates:
                raise SuperdeskApiError.forbiddenError(
                    message='Must have one incoming stage in a desk')
示例#36
0
 def test_get_expiry_date_bad_offset_raises_error(self):
     with assert_raises(TypeError) as error_context:
         offset = '01.02.2013 13:30'
         get_expiry_date(minutes=5, offset=offset)
示例#37
0
 def test_get_global_content_expiry(self):
     calculated_minutes = get_item_expiry(self.app, None)
     reference_minutes = get_expiry_date(99)
     self.assertEquals(calculated_minutes.hour, reference_minutes.hour)
     self.assertEquals(calculated_minutes.minute, reference_minutes.minute)
示例#38
0
def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None):
    items_ids = []
    try:
        ingest_collection = feeding_service.service if hasattr(feeding_service, 'service') else 'ingest'
        ingest_service = superdesk.get_resource_service(ingest_collection)

        # determine if we already have this item
        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        if not old_item:
            item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML))
            item[FAMILY_ID] = item[superdesk.config.ID_FIELD]

        item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault('source', provider.get('source', ''))
        set_default_state(item, CONTENT_STATE.INGESTED)
        item['expiry'] = get_expiry_date(provider.get('content_expiry') or app.config['INGEST_EXPIRY_MINUTES'],
                                         item.get('versioncreated'))

        if 'anpa_category' in item:
            process_anpa_category(item, provider)

        if 'subject' in item:
            if not app.config.get('INGEST_SKIP_IPTC_CODES', False):
                # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented
                process_iptc_codes(item, provider)
            if 'anpa_category' not in item:
                derive_category(item, provider)
        elif 'anpa_category' in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        if item.get('pubstatus', '') == 'canceled':
            item[ITEM_STATE] = CONTENT_STATE.KILLED
            ingest_cancel(item, feeding_service)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend:
                href = feeding_service.prepare_href(baseImageRend['href'], rend.get('mimetype'))
                update_renditions(item, href, old_item)

        # if the item has associated media
        for key, assoc in item.get('associations', {}).items():
            set_default_state(assoc, CONTENT_STATE.INGESTED)
            if assoc.get('renditions'):
                transfer_renditions(assoc['renditions'])
            # wire up the id of the associated feature media to the ingested one
            guid = assoc.get('guid')
            if guid:
                lookup = {'guid': guid}
                ingested = ingest_service.get_from_mongo(req=None, lookup=lookup)
                if ingested.count() >= 1:
                    assoc['_id'] = ingested[0]['_id']
                    for rendition in ingested[0].get('renditions', {}):  # add missing renditions
                        assoc['renditions'].setdefault(
                            rendition,
                            ingested[0]['renditions'][rendition])
                else:  # there is no such item in the system - ingest it
                    status, ids = ingest_item(assoc, provider, feeding_service, rule_set)
                    if status:
                        assoc['_id'] = ids[0]
                        items_ids.extend(ids)

        new_version = True
        if old_item:
            updates = deepcopy(item)
            ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item)
            item.update(old_item)
            item.update(updates)
            items_ids.append(item['_id'])
            # if the feed is versioned and this is not a new version
            if 'version' in item and 'version' in old_item and item.get('version') == old_item.get('version'):
                new_version = False
        else:
            if item.get('ingest_provider_sequence') is None:
                ingest_service.set_ingest_provider_sequence(item, provider)
            try:
                items_ids.extend(ingest_service.post_in_mongo([item]))
            except HTTPException as e:
                logger.error('Exception while persisting item in %s collection: %s', ingest_collection, e)
                raise e

        if routing_scheme and new_version:
            routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme)

    except Exception as ex:
        logger.exception(ex)
        ProviderError.ingestItemError(ex, provider, item=item)
        return False, []
    return True, items_ids
示例#39
0
def get_item_expiry(app, stage):
    expiry_minutes = app.settings['CONTENT_EXPIRY_MINUTES']
    if stage:
        expiry_minutes = stage.get('content_expiry', expiry_minutes)

    return get_expiry_date(expiry_minutes)
示例#40
0
def get_item_expiry(app, stage):
    expiry_minutes = app.settings['CONTENT_EXPIRY_MINUTES']
    if stage:
        expiry_minutes = stage.get('content_expiry', expiry_minutes)

    return get_expiry_date(expiry_minutes)
示例#41
0
def ingest_item(item,
                provider,
                feeding_service,
                rule_set=None,
                routing_scheme=None):
    try:
        ingest_collection = feeding_service.service if hasattr(
            feeding_service, 'service') else 'ingest'
        ingest_service = superdesk.get_resource_service(ingest_collection)

        # determine if we already have this item
        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        if not old_item:
            item.setdefault(superdesk.config.ID_FIELD,
                            generate_guid(type=GUID_NEWSML))
            item[FAMILY_ID] = item[superdesk.config.ID_FIELD]

        item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault('source', provider.get('source', ''))
        set_default_state(item, CONTENT_STATE.INGESTED)
        item['expiry'] = get_expiry_date(
            provider.get('content_expiry',
                         app.config['INGEST_EXPIRY_MINUTES']),
            item.get('versioncreated'))

        if 'anpa_category' in item:
            process_anpa_category(item, provider)

        if 'subject' in item:
            if not app.config.get('INGEST_SKIP_IPTC_CODES', False):
                # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented
                process_iptc_codes(item, provider)
            if 'anpa_category' not in item:
                derive_category(item, provider)
        elif 'anpa_category' in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        if item.get('pubstatus', '') == 'canceled':
            item[ITEM_STATE] = CONTENT_STATE.KILLED
            ingest_cancel(item, feeding_service)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend:
                href = feeding_service.prepare_href(baseImageRend['href'],
                                                    rend.get('mimetype'))
                update_renditions(item, href, old_item)

        new_version = True
        items_ids = []
        if old_item:
            updates = deepcopy(item)
            ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD],
                                          updates, old_item)
            item.update(old_item)
            item.update(updates)
            items_ids = [item['_id']]
            # if the feed is versioned and this is not a new version
            if 'version' in item and 'version' in old_item and item.get(
                    'version') == old_item.get('version'):
                new_version = False
        else:
            if item.get('ingest_provider_sequence') is None:
                ingest_service.set_ingest_provider_sequence(item, provider)
            try:
                items_ids = ingest_service.post_in_mongo([item])
            except HTTPException as e:
                logger.error(
                    'Exception while persisting item in %s collection: %s',
                    ingest_collection, e)

        if routing_scheme and new_version:
            routed = ingest_service.find_one(
                _id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service(
                'routing_schemes').apply_routing_scheme(
                    routed, provider, routing_scheme)

    except Exception as ex:
        logger.exception(ex)
        return False
    return True, items_ids
示例#42
0
 def test_get_stage_content_expiry(self):
     stage = {"content_expiry": 10}
     calculated_minutes = get_item_expiry(desk=None, stage=stage)
     reference_minutes = get_expiry_date(10)
     self.assertEquals(calculated_minutes.hour, reference_minutes.hour)
     self.assertEquals(calculated_minutes.minute, reference_minutes.minute)
示例#43
0
 def test_get_expiry_date_none(self):
     self.assertIsNone(get_expiry_date(0))
     self.assertIsNone(get_expiry_date(None))
示例#44
0
 def test_get_stage_content_expiry(self):
     stage = {"content_expiry": 10}
     calculated_minutes = get_item_expiry(self.app, stage)
     reference_minutes = get_expiry_date(10)
     self.assertEquals(calculated_minutes.hour, reference_minutes.hour)
     self.assertEquals(calculated_minutes.minute, reference_minutes.minute)
示例#45
0
 def test_get_global_content_expiry(self):
     calculated_minutes = get_item_expiry(self.app, None)
     reference_minutes = get_expiry_date(99)
     self.assertEquals(calculated_minutes.hour, reference_minutes.hour)
     self.assertEquals(calculated_minutes.minute, reference_minutes.minute)
示例#46
0
 def test_get_expiry_date_bad_offset_raises_error(self):
     with assert_raises(TypeError) as error_context:
         offset = '01.02.2013 13:30'
         get_expiry_date(minutes=5, offset=offset)
示例#47
0
 def test_get_global_content_expiry(self):
     calculated_minutes = get_item_expiry(desk=None, stage=None)
     reference_minutes = get_expiry_date(
         self.ctx.app.config["CONTENT_EXPIRY_MINUTES"])
     self.assertEquals(calculated_minutes.hour, reference_minutes.hour)
     self.assertEquals(calculated_minutes.minute, reference_minutes.minute)
示例#48
0
def ingest_item(item,
                provider,
                feeding_service,
                rule_set=None,
                routing_scheme=None):
    items_ids = []
    try:
        ingest_collection = feeding_service.service if hasattr(
            feeding_service, 'service') else 'ingest'
        ingest_service = superdesk.get_resource_service(ingest_collection)

        # determine if we already have this item
        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        if not old_item:
            item.setdefault(superdesk.config.ID_FIELD,
                            generate_guid(type=GUID_NEWSML))
            item[FAMILY_ID] = item[superdesk.config.ID_FIELD]

        item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault('source', provider.get('source', ''))
        item.setdefault('uri', item[GUID_FIELD])  # keep it as original guid

        if item.get('profile'):
            try:
                item['profile'] = bson.ObjectId(item['profile'])
            except bson.errors.InvalidId:
                pass

        set_default_state(item, CONTENT_STATE.INGESTED)
        item['expiry'] = get_expiry_date(
            provider.get('content_expiry')
            or app.config['INGEST_EXPIRY_MINUTES'], item.get('versioncreated'))

        if 'anpa_category' in item:
            process_anpa_category(item, provider)

        if 'subject' in item:
            if not app.config.get('INGEST_SKIP_IPTC_CODES', False):
                # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented
                process_iptc_codes(item, provider)
            if 'anpa_category' not in item:
                derive_category(item, provider)
        elif 'anpa_category' in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        if item.get('pubstatus', '') == 'canceled':
            item[ITEM_STATE] = CONTENT_STATE.KILLED
            ingest_cancel(item, feeding_service)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend and not baseImageRend.get(
                    'media'):  # if there is media should be processed already
                href = feeding_service.prepare_href(baseImageRend['href'],
                                                    rend.get('mimetype'))
                update_renditions(item, href, old_item)

        # if the item has associated media
        for key, assoc in item.get('associations', {}).items():
            set_default_state(assoc, CONTENT_STATE.INGESTED)
            # wire up the id of the associated feature media to the ingested one
            guid = assoc.get('guid')
            assoc_name = assoc.get('headline') or assoc.get('slugline') or guid
            if guid:
                ingested = ingest_service.find_one(req=None, guid=guid)
                logger.info('assoc ingested before %s', assoc_name)
                if ingested is not None:
                    assoc['_id'] = ingested['_id']
                    if is_new_version(assoc, ingested) and assoc.get(
                            'renditions'):  # new version
                        logger.info(
                            'new assoc version - re-transfer renditions for %s',
                            assoc_name)
                        transfer_renditions(assoc['renditions'])
                    else:
                        logger.info(
                            'same/old version - use already fetched renditions for %s',
                            assoc_name)
                        update_assoc_renditions(assoc, ingested)
                else:  # there is no such item in the system - ingest it
                    if assoc.get('renditions') and has_system_renditions(
                            assoc):  # all set, just download
                        logger.info(
                            'new association  with system renditions - transfer %s',
                            assoc_name)
                        transfer_renditions(assoc['renditions'])
                    status, ids = ingest_item(assoc, provider, feeding_service,
                                              rule_set)
                    if status:
                        assoc['_id'] = ids[0]
                        items_ids.extend(ids)
                        ingested = ingest_service.find_one(req=None,
                                                           _id=ids[0])
                        update_assoc_renditions(assoc, ingested)
            elif assoc.get('residRef'):
                item['associations'][key] = resolve_ref(assoc)

        new_version = True
        if old_item:
            new_version = is_new_version(item, old_item)
            updates = deepcopy(item)
            ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD],
                                          updates, old_item)
            item.update(old_item)
            item.update(updates)
            items_ids.append(item['_id'])
        else:
            if item.get('ingest_provider_sequence') is None:
                ingest_service.set_ingest_provider_sequence(item, provider)
            try:
                items_ids.extend(ingest_service.post_in_mongo([item]))
            except HTTPException as e:
                logger.error(
                    'Exception while persisting item in %s collection: %s',
                    ingest_collection, e)
                raise e

        if routing_scheme and new_version:
            routed = ingest_service.find_one(
                _id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service(
                'routing_schemes').apply_routing_scheme(
                    routed, provider, routing_scheme)

    except Exception as ex:
        logger.exception(ex)
        ProviderError.ingestItemError(ex, provider, item=item)
        return False, []
    return True, items_ids
示例#49
0
def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None):
    try:
        ingest_service = superdesk.get_resource_service('ingest')

        # determine if we already have this item
        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        if not old_item:
            item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML))
            item[FAMILY_ID] = item[superdesk.config.ID_FIELD]

        item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault('source', provider.get('source', ''))
        set_default_state(item, CONTENT_STATE.INGESTED)
        item['expiry'] = get_expiry_date(provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES']),
                                         item.get('versioncreated'))

        if 'anpa_category' in item:
            process_anpa_category(item, provider)

        if 'subject' in item:
            process_iptc_codes(item, provider)
            if 'anpa_category' not in item:
                derive_category(item, provider)
        elif 'anpa_category' in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        if item.get('pubstatus', '') == 'canceled':
            item[ITEM_STATE] = CONTENT_STATE.KILLED
            ingest_cancel(item)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend:
                href = feeding_service.prepare_href(baseImageRend['href'], rend.get('mimetype'))
                update_renditions(item, href, old_item)

        new_version = True
        if old_item:
            updates = deepcopy(item)
            ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item)
            item.update(old_item)
            item.update(updates)
            # if the feed is versioned and this is not a new version
            if 'version' in item and 'version' in old_item and item.get('version') == old_item.get('version'):
                new_version = False
        else:
            if item.get('ingest_provider_sequence') is None:
                ingest_service.set_ingest_provider_sequence(item, provider)
            try:
                ingest_service.post_in_mongo([item])
            except HTTPException as e:
                logger.error("Exception while persisting item in ingest collection", e)

        if routing_scheme and new_version:
            routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme)

    except Exception as ex:
        logger.exception(ex)
        try:
            superdesk.app.sentry.captureException()
        except:
            pass
        return False
    return True
示例#50
0
 def test_get_expiry_date_overflow(self):
     self.assertIsNone(get_expiry_date(9999999999999))
     self.assertIsNone(get_expiry_date(9999999999999, utcnow()))
示例#51
0
 def test_get_desk_content_expiry(self):
     desk = {"content_expiry": 10}
     calculated_minutes = get_item_expiry(desk=desk, stage=None)
     reference_minutes = get_expiry_date(10)
     self.assertEquals(calculated_minutes.hour, reference_minutes.hour)
     self.assertEquals(calculated_minutes.minute, reference_minutes.minute)
示例#52
0
def ingest_item(item, provider, rule_set=None, routing_scheme=None):
    try:
        item.setdefault(superdesk.config.ID_FIELD,
                        generate_guid(type=GUID_NEWSML))
        item[FAMILY_ID] = item[superdesk.config.ID_FIELD]
        providers[provider.get('type')].provider = provider

        item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault('source', provider.get('source', ''))
        set_default_state(item, STATE_INGESTED)
        item['expiry'] = get_expiry_date(
            provider.get('content_expiry',
                         app.config['INGEST_EXPIRY_MINUTES']),
            item.get('versioncreated'))

        if 'anpa_category' in item:
            process_anpa_category(item, provider)

        if 'subject' in item:
            process_iptc_codes(item, provider)
            if 'anpa_category' not in item:
                derive_category(item, provider)
        elif 'anpa_category' in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        ingest_service = superdesk.get_resource_service('ingest')

        if item.get('ingest_provider_sequence') is None:
            ingest_service.set_ingest_provider_sequence(item, provider)

        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend:
                href = providers[provider.get('type')].prepare_href(
                    baseImageRend['href'])
                update_renditions(item, href, old_item)

        new_version = True
        if old_item:
            # In case we already have the item, preserve the _id
            item[superdesk.config.ID_FIELD] = old_item[
                superdesk.config.ID_FIELD]
            ingest_service.put_in_mongo(item[superdesk.config.ID_FIELD], item)
            # if the feed is versioned and this is not a new version
            if 'version' in item and 'version' in old_item and item.get(
                    'version') == old_item.get('version'):
                new_version = False
        else:
            try:
                ingest_service.post_in_mongo([item])
            except HTTPException as e:
                logger.error(
                    "Exception while persisting item in ingest collection", e)

        if routing_scheme and new_version:
            routed = ingest_service.find_one(
                _id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service(
                'routing_schemes').apply_routing_scheme(
                    routed, provider, routing_scheme)
    except Exception as ex:
        logger.exception(ex)
        try:
            superdesk.app.sentry.captureException()
        except:
            pass
        return False
    return True
示例#53
0
def ingest_item(item,
                provider,
                feeding_service,
                rule_set=None,
                routing_scheme=None,
                expiry=None):
    items_ids = []
    try:
        ingest_collection = get_ingest_collection(feeding_service, item)
        ingest_service = superdesk.get_resource_service(ingest_collection)

        # determine if we already have this item
        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        if not old_item:
            item.setdefault(superdesk.config.ID_FIELD,
                            generate_guid(type=GUID_NEWSML))
            item[FAMILY_ID] = item[superdesk.config.ID_FIELD]

        item["ingest_provider"] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault("source", provider.get("source", ""))
        item.setdefault("uri", item[GUID_FIELD])  # keep it as original guid

        if item.get("profile"):
            try:
                item["profile"] = bson.ObjectId(item["profile"])
            except bson.errors.InvalidId:
                pass
            profile = superdesk.get_resource_service("content_types").find_one(
                req=None, _id=item["profile"])
            if not profile:  # unknown profile
                item.pop("profile")

        set_default_state(item, CONTENT_STATE.INGESTED)
        item["expiry"] = (
            get_expiry_date(
                provider.get("content_expiry")
                or app.config["INGEST_EXPIRY_MINUTES"],
                item.get("versioncreated")) if not expiry else expiry
        )  # when fetching associated item set expiry to match parent

        if "anpa_category" in item:
            process_anpa_category(item, provider)

        if "subject" in item:
            if not app.config.get("INGEST_SKIP_IPTC_CODES", False):
                # FIXME: temporary fix for SDNTB-344, need to be removed once SDESK-439 is implemented
                process_iptc_codes(item, provider)
            if "anpa_category" not in item:
                derive_category(item, provider)
        elif "anpa_category" in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        if item.get("pubstatus", "") == "canceled":
            item[ITEM_STATE] = CONTENT_STATE.KILLED
            ingest_cancel(item, feeding_service)

        rend = item.get("renditions", {})
        if rend:
            baseImageRend = rend.get("baseImage") or next(iter(rend.values()))
            if baseImageRend and not baseImageRend.get(
                    "media"):  # if there is media should be processed already
                href = feeding_service.prepare_href(baseImageRend["href"],
                                                    rend.get("mimetype"))
                update_renditions(item, href, old_item)

        # if the item has associated media
        for key, assoc in item.get("associations", {}).items():
            set_default_state(assoc, CONTENT_STATE.INGESTED)
            # wire up the id of the associated feature media to the ingested one
            guid = assoc.get("guid")
            assoc_name = assoc.get("headline") or assoc.get("slugline") or guid
            if guid:
                ingested = ingest_service.find_one(req=None, guid=guid)
                if ingested is not None:
                    logger.info("assoc ingested before %s", assoc_name)
                    assoc["_id"] = ingested["_id"]
                    # update expiry so assoc will stay as long as the item using it
                    ingest_service.system_update(ingested["_id"],
                                                 {"expiry": item["expiry"]},
                                                 ingested)
                    if is_new_version(assoc, ingested) and assoc.get(
                            "renditions"):  # new version
                        logger.info(
                            "new assoc version - re-transfer renditions for %s",
                            assoc_name)
                        try:
                            transfer_renditions(assoc["renditions"])
                        except SuperdeskApiError:
                            logger.exception(
                                "failed to update associated item renditions",
                                extra=dict(
                                    guid=guid,
                                    name=assoc_name,
                                ),
                            )
                    else:
                        logger.info(
                            "same/old version - use already fetched renditions for %s",
                            assoc_name)
                        update_assoc_renditions(assoc, ingested)
                else:  # there is no such item in the system - ingest it
                    if assoc.get("renditions") and has_system_renditions(
                            assoc):  # all set, just download
                        logger.info(
                            "new association with system renditions - transfer %s",
                            assoc_name)
                        try:
                            transfer_renditions(assoc["renditions"])
                        except SuperdeskApiError:
                            logger.exception(
                                "failed to download renditions",
                                extra=dict(
                                    guid=guid,
                                    name=assoc_name,
                                ),
                            )
                    status, ids = ingest_item(assoc,
                                              provider,
                                              feeding_service,
                                              rule_set,
                                              expiry=item["expiry"])
                    if status:
                        assoc["_id"] = ids[0]
                        items_ids.extend(ids)
                        ingested = ingest_service.find_one(req=None,
                                                           _id=ids[0])
                        update_assoc_renditions(assoc, ingested)
            elif assoc.get("residRef"):
                item["associations"][key] = resolve_ref(assoc)

        new_version = True
        if old_item:
            new_version = is_new_version(item, old_item)
            updates = deepcopy(item)
            ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD],
                                          updates, old_item)
            item.update(old_item)
            item.update(updates)
            items_ids.append(item["_id"])
        else:
            if item.get("ingest_provider_sequence") is None:
                ingest_service.set_ingest_provider_sequence(item, provider)
            try:
                items_ids.extend(ingest_service.post_in_mongo([item]))
            except HTTPException as e:
                logger.error(
                    "Exception while persisting item in %s collection: %s",
                    ingest_collection, e)
                raise e

        if routing_scheme and new_version:
            routed = ingest_service.find_one(
                _id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service(
                "routing_schemes").apply_routing_scheme(
                    routed, provider, routing_scheme)

    except Exception as ex:
        logger.exception(ex)
        ProviderError.ingestItemError(ex, provider, item=item)
        return False, []
    return True, items_ids