Exemplo n.º 1
0
def on_create_item(docs, repo_type=ARCHIVE):
    """Make sure item has basic fields populated."""

    for doc in docs:
        update_dates_for(doc)
        set_original_creator(doc)

        if not doc.get(GUID_FIELD):
            doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)

        if 'unique_id' not in doc:
            generate_unique_id_and_name(doc, repo_type)

        if 'family_id' not in doc:
            doc['family_id'] = doc[GUID_FIELD]

        if 'event_id' not in doc:
            doc['event_id'] = generate_guid(type=GUID_TAG)

        set_default_state(doc, CONTENT_STATE.DRAFT)
        doc.setdefault(config.ID_FIELD, doc[GUID_FIELD])

        if repo_type == ARCHIVE and not doc.get('ingest_provider'):
            # set the source for the article
            set_default_source(doc)

        copy_metadata_from_user_preferences(doc, repo_type)

        if 'profile' not in doc and app.config.get('DEFAULT_CONTENT_TYPE', None):
            doc['profile'] = app.config.get('DEFAULT_CONTENT_TYPE', None)

        if not doc.get(ITEM_OPERATION):
            doc[ITEM_OPERATION] = ITEM_CREATE
    def _duplicate_planning(self, original):
        new_plan = deepcopy(original)

        if new_plan.get('expired') and new_plan.get('event_item'):
            # If the Planning item has expired and is associated with an Event
            # then we remove the link to the associated Event as the Event would have
            # been expired also
            del new_plan['event_item']

        for f in ('_id', 'guid', 'lock_user', 'lock_time', 'original_creator',
                  '_planning_schedule'
                  'lock_session', 'lock_action', '_created', '_updated',
                  '_etag', 'pubstatus', 'expired', 'featured'):
            new_plan.pop(f, None)

        new_plan[ITEM_STATE] = WORKFLOW_STATE.DRAFT
        new_plan['guid'] = generate_guid(type=GUID_NEWSML)

        planning_datetime = new_plan.get('planning_date')
        current_date = get_local_end_of_day().date()
        if planning_datetime.date() < current_date:
            new_plan['planning_date'] = planning_datetime.replace(
                day=current_date.day,
                month=current_date.month,
                year=current_date.year)

        for cov in new_plan.get('coverages') or []:
            cov.pop('assigned_to', None)
            cov.get('planning',
                    {})['scheduled'] = new_plan.get('planning_date')
            cov['coverage_id'] = generate_guid(type=GUID_NEWSML)
            cov['workflow_status'] = WORKFLOW_STATE.DRAFT
            cov['news_coverage_status'] = {'qcode': 'ncostat:int'}

        return new_plan
Exemplo n.º 3
0
def generate_recurring_events(event):
    generated_events = []
    setRecurringMode(event)

    # Get the recurrence_id, or generate one if it doesn't exist
    recurrence_id = event.get('recurrence_id', generate_guid(type=GUID_NEWSML))

    # compute the difference between start and end in the original event
    time_delta = event['dates']['end'] - event['dates']['start']
    # for all the dates based on the recurring rules:
    for date in itertools.islice(generate_recurring_dates(
            start=event['dates']['start'],
            tz=event['dates'].get('tz') and pytz.timezone(event['dates']['tz'] or None),
            **event['dates']['recurring_rule']
    ), 0, 200):  # set a limit to prevent too many events to be created
        # create event with the new dates
        new_event = copy.deepcopy(event)
        new_event['dates']['start'] = date
        new_event['dates']['end'] = date + time_delta
        # set a unique guid
        new_event['guid'] = generate_guid(type=GUID_NEWSML)
        new_event['_id'] = new_event['guid']
        # set the recurrence id
        new_event['recurrence_id'] = recurrence_id

        # set expiry date
        overwrite_event_expiry_date(new_event)

        generated_events.append(new_event)

    return generated_events
Exemplo n.º 4
0
def on_create_item(docs, repo_type=ARCHIVE):
    """Make sure item has basic fields populated."""

    for doc in docs:
        update_dates_for(doc)
        set_original_creator(doc)

        if not doc.get(GUID_FIELD):
            doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)

        if "unique_id" not in doc:
            generate_unique_id_and_name(doc, repo_type)

        if "family_id" not in doc:
            doc["family_id"] = doc[GUID_FIELD]

        if "event_id" not in doc:
            doc["event_id"] = generate_guid(type=GUID_TAG)

        set_default_state(doc, CONTENT_STATE.DRAFT)
        doc.setdefault(config.ID_FIELD, doc[GUID_FIELD])
        set_dateline(doc, repo_type)
        set_byline(doc, repo_type)
        set_sign_off(doc, repo_type=repo_type)

        if not doc.get(ITEM_OPERATION):
            doc[ITEM_OPERATION] = ITEM_CREATE
Exemplo n.º 5
0
def on_create_item(docs, repo_type=ARCHIVE):
    """Make sure item has basic fields populated."""

    for doc in docs:
        update_dates_for(doc)
        set_original_creator(doc)

        if not doc.get(GUID_FIELD):
            doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)

        if 'unique_id' not in doc:
            generate_unique_id_and_name(doc, repo_type)

        if 'family_id' not in doc:
            doc['family_id'] = doc[GUID_FIELD]

        if 'event_id' not in doc:
            doc['event_id'] = generate_guid(type=GUID_TAG)

        set_default_state(doc, CONTENT_STATE.DRAFT)
        doc.setdefault(config.ID_FIELD, doc[GUID_FIELD])

        copy_metadata_from_user_preferences(doc, repo_type)

        if not doc.get(ITEM_OPERATION):
            doc[ITEM_OPERATION] = ITEM_CREATE
Exemplo n.º 6
0
def on_duplicate_item(doc, original_doc, operation=None):
    """Make sure duplicated item has basic fields populated."""

    doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)
    generate_unique_id_and_name(doc)
    doc['event_id'] = generate_guid(type=GUID_TAG)
    doc.setdefault('_id', doc[GUID_FIELD])
    set_sign_off(doc)
    doc['force_unlock'] = True
    doc[ITEM_OPERATION] = operation or ITEM_DUPLICATE
    doc['original_id'] = original_doc.get('item_id', original_doc.get('_id'))
    set_default_source(doc)
Exemplo n.º 7
0
def on_duplicate_item(doc, original_doc):
    """Make sure duplicated item has basic fields populated."""

    doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)
    generate_unique_id_and_name(doc)
    doc['event_id'] = generate_guid(type=GUID_TAG)
    doc.setdefault('_id', doc[GUID_FIELD])
    set_sign_off(doc)
    doc['force_unlock'] = True
    doc[ITEM_OPERATION] = ITEM_DUPLICATE
    doc['original_id'] = original_doc.get('item_id', original_doc.get('_id'))
    set_default_source(doc)
Exemplo n.º 8
0
def on_duplicate_item(doc, original_doc, operation=None):
    """Make sure duplicated item has basic fields populated."""

    doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)
    generate_unique_id_and_name(doc)
    doc["event_id"] = generate_guid(type=GUID_TAG)
    doc.setdefault("_id", doc[GUID_FIELD])
    set_sign_off(doc)
    doc["force_unlock"] = True
    doc[ITEM_OPERATION] = operation or ITEM_DUPLICATE
    doc["original_id"] = original_doc.get("item_id", original_doc.get("_id"))
    set_default_source(doc)
Exemplo n.º 9
0
def on_create_item(docs, repo_type=ARCHIVE):
    """Make sure item has basic fields populated."""

    for doc in docs:
        editor_utils.generate_fields(doc)
        update_dates_for(doc)
        set_original_creator(doc)

        if not doc.get(GUID_FIELD):
            doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)

        if "unique_id" not in doc:
            generate_unique_id_and_name(doc, repo_type)

        if "family_id" not in doc:
            doc["family_id"] = doc[GUID_FIELD]

        if "event_id" not in doc and repo_type != "ingest":
            doc["event_id"] = generate_guid(type=GUID_TAG)

        set_default_state(doc, CONTENT_STATE.DRAFT)
        doc.setdefault(config.ID_FIELD, doc[GUID_FIELD])

        if repo_type == ARCHIVE:
            # set the source for the article
            set_default_source(doc)

        if "profile" not in doc and app.config.get("DEFAULT_CONTENT_TYPE",
                                                   None):
            doc["profile"] = app.config.get("DEFAULT_CONTENT_TYPE", None)

        copy_metadata_from_profile(doc)
        copy_metadata_from_user_preferences(doc, repo_type)

        if "language" not in doc:
            doc["language"] = app.config.get("DEFAULT_LANGUAGE", "en")

            if doc.get("task", None) and doc["task"].get("desk", None):
                desk = superdesk.get_resource_service("desks").find_one(
                    req=None, _id=doc["task"]["desk"])
                if desk and desk.get("desk_language", None):
                    doc["language"] = desk["desk_language"]

        if not doc.get(ITEM_OPERATION):
            doc[ITEM_OPERATION] = ITEM_CREATE

        if doc.get("template"):
            from apps.templates.content_templates import render_content_template_by_id  # avoid circular import

            doc.pop("fields_meta", None)
            render_content_template_by_id(doc, doc["template"], update=True)
            editor_utils.generate_fields(doc)
Exemplo n.º 10
0
def on_create_item(docs, repo_type=ARCHIVE):
    """Make sure item has basic fields populated."""

    for doc in docs:
        editor_utils.generate_fields(doc)
        update_dates_for(doc)
        set_original_creator(doc)

        if not doc.get(GUID_FIELD):
            doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)

        if 'unique_id' not in doc:
            generate_unique_id_and_name(doc, repo_type)

        if 'family_id' not in doc:
            doc['family_id'] = doc[GUID_FIELD]

        if 'event_id' not in doc and repo_type != 'ingest':
            doc['event_id'] = generate_guid(type=GUID_TAG)

        set_default_state(doc, CONTENT_STATE.DRAFT)
        doc.setdefault(config.ID_FIELD, doc[GUID_FIELD])

        if repo_type == ARCHIVE:
            # set the source for the article
            set_default_source(doc)

        if 'profile' not in doc and app.config.get('DEFAULT_CONTENT_TYPE',
                                                   None):
            doc['profile'] = app.config.get('DEFAULT_CONTENT_TYPE', None)

        copy_metadata_from_profile(doc)
        copy_metadata_from_user_preferences(doc, repo_type)

        if 'language' not in doc:
            doc['language'] = app.config.get('DEFAULT_LANGUAGE', 'en')

            if doc.get('task', None) and doc['task'].get('desk', None):
                desk = superdesk.get_resource_service('desks').find_one(
                    req=None, _id=doc['task']['desk'])
                if desk and desk.get('desk_language', None):
                    doc['language'] = desk['desk_language']

        if not doc.get(ITEM_OPERATION):
            doc[ITEM_OPERATION] = ITEM_CREATE

        if doc.get('template'):
            from apps.templates.content_templates import render_content_template_by_id  # avoid circular import
            doc.pop('fields_meta', None)
            render_content_template_by_id(doc, doc['template'], update=True)
            editor_utils.generate_fields(doc)
Exemplo n.º 11
0
    def create(self, docs, **kwargs):
        new_guids = []
        provider = get_resource_service("ingest_providers").find_one(source="aapmm", req=None)
        if provider and "config" in provider and "username" in provider["config"]:
            self.backend.set_credentials(provider["config"]["username"], provider["config"]["password"])
        for doc in docs:
            if not doc.get("desk"):
                # if no desk is selected then it is bad request
                raise SuperdeskApiError.badRequestError("Destination desk cannot be empty.")
            try:
                archived_doc = self.backend.find_one_raw(doc["guid"], doc["guid"])
            except FileNotFoundError as ex:
                raise ProviderError.externalProviderError(ex, provider)

            dest_doc = dict(archived_doc)
            new_id = generate_guid(type=GUID_TAG)
            new_guids.append(new_id)
            dest_doc["_id"] = new_id
            generate_unique_id_and_name(dest_doc)

            if provider:
                dest_doc["ingest_provider"] = str(provider[superdesk.config.ID_FIELD])

            dest_doc[config.VERSION] = 1
            send_to(doc=dest_doc, update=None, desk_id=doc.get("desk"), stage_id=doc.get("stage"))
            dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED)
            dest_doc[INGEST_ID] = archived_doc["_id"]
            dest_doc[FAMILY_ID] = archived_doc["_id"]
            remove_unwanted(dest_doc)
            set_original_creator(dest_doc)

            superdesk.get_resource_service(ARCHIVE).post([dest_doc])
            insert_into_versions(dest_doc.get("_id"))

        return new_guids
Exemplo n.º 12
0
    def _create_package(self, text_item, image_items):
        """
        Create a new content package from given content items.
        """
        package = {
            ITEM_TYPE: CONTENT_TYPE.COMPOSITE,
            'guid': generate_guid(type=GUID_TAG,
                                  id=text_item.get('guid') + '-package'),
            'versioncreated': text_item['versioncreated'],
            'firstcreated': text_item.get('firstcreated'),
            'headline': text_item.get('headline', ''),
            'groups': [
                {
                    'id': 'root',
                    'role': 'grpRole:NEP',
                    'refs': [{'idRef': 'main'}],
                }, {
                    'id': 'main',
                    'role': 'main',
                    'refs': [],
                }
            ]
        }

        item_references = package['groups'][1]['refs']
        item_references.append({'residRef': text_item['guid']})

        for image in image_items:
            item_references.append({'residRef': image['guid']})

        return package
Exemplo n.º 13
0
    def create(self, docs, **kwargs):
        new_guids = []
        provider = self.get_provider()
        for doc in docs:
            if not doc.get('desk'):
                # if no desk is selected then it is bad request
                raise SuperdeskApiError.badRequestError("Destination desk cannot be empty.")
            try:
                archived_doc = self.fetch(doc['guid'])
            except FileNotFoundError as ex:
                raise ProviderError.externalProviderError(ex, provider)

            dest_doc = dict(archived_doc)
            new_id = generate_guid(type=GUID_TAG)
            new_guids.append(new_id)
            dest_doc['_id'] = new_id
            generate_unique_id_and_name(dest_doc)

            if provider:
                dest_doc['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])

            dest_doc[config.VERSION] = 1
            send_to(doc=dest_doc, update=None, desk_id=doc.get('desk'), stage_id=doc.get('stage'))
            dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED)
            dest_doc[INGEST_ID] = archived_doc['_id']
            dest_doc[FAMILY_ID] = archived_doc['_id']
            remove_unwanted(dest_doc)
            set_original_creator(dest_doc)

            superdesk.get_resource_service(ARCHIVE).post([dest_doc])
            insert_into_versions(dest_doc.get('_id'))

        return new_guids
Exemplo n.º 14
0
    def _create_image_items(self, image_links, text_item):
        image_items = []

        for image_url in image_links:
            guid_hash = hashlib.sha1(image_url.encode('utf8')).hexdigest()
            img_item = {
                'guid':
                generate_guid(type=GUID_TAG,
                              id=text_item.get('guid') + guid_hash + '-image'),
                ITEM_TYPE:
                CONTENT_TYPE.PICTURE,
                'versioncreated':
                text_item.get('versioncreated'),
                'firstcreated':
                text_item.get('firstcreated'),
                'headline':
                text_item.get('headline', ''),
                'renditions': {
                    'baseImage': {
                        'href': image_url
                    }
                }
            }
            image_items.append(img_item)

        return image_items
Exemplo n.º 15
0
    def on_create(self, docs):
        """Set default metadata."""

        for doc in docs:
            if not doc.get('guid'):
                doc['guid'] = generate_guid(type=GUID_NEWSML)
            set_original_creator(doc)
Exemplo n.º 16
0
    def parse_item(self, image_path):
        filename = os.path.basename(image_path)
        content_type = mimetypes.guess_type(image_path)[0]
        guid = utils.generate_guid(type=GUID_TAG)
        item = {'guid': guid,
                'uri': guid,
                config.VERSION: 1,
                ITEM_TYPE: CONTENT_TYPE.PICTURE,
                'mimetype': content_type,
                'versioncreated': utcnow(),
                }
        with open(image_path, 'rb') as f:
            _, content_type, file_metadata = process_file_from_stream(f, content_type=content_type)
            f.seek(0)
            file_id = app.media.put(f, filename=filename, content_type=content_type, metadata=file_metadata)
            filemeta.set_filemeta(item, file_metadata)
            f.seek(0)

            metadata = get_meta_iptc(f)
            f.seek(0)
            self.parse_meta(item, metadata)

            rendition_spec = get_renditions_spec(no_custom_crops=True)
            renditions = generate_renditions(f, file_id, [file_id], 'image',
                                             content_type, rendition_spec, url_for_media)
            item['renditions'] = renditions
        return item
Exemplo n.º 17
0
    def update(self, id, updates, original):
        original_state = original[ITEM_STATE]
        if not is_workflow_state_transition_valid(ITEM_SPIKE, original_state):
            raise InvalidStateTransitionError()

        user = get_user(required=True)
        item = get_resource_service(ARCHIVE).find_one(req=None, _id=id)
        task = item.get('task', {})

        updates[EXPIRY] = self._get_spike_expiry(desk_id=task.get('desk'), stage_id=task.get('stage'))
        updates[REVERT_STATE] = item.get(ITEM_STATE, None)

        if original.get('rewrite_of'):
            updates['rewrite_of'] = None

        if original.get('rewritten_by'):
            updates['rewritten_by'] = None

        if original.get('broadcast'):
            updates['broadcast'] = None

        if original.get('rewrite_sequence'):
            updates['rewrite_sequence'] = None

        # remove any relation with linked items
        updates[ITEM_EVENT_ID] = generate_guid(type=GUID_TAG)

        # remove lock
        updates.update({
            'lock_user': None,
            'lock_session': None,
        })

        if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            # remove links from items in the package
            package_service = PackageService()
            items = package_service.get_item_refs(original)
            for item in items:
                package_item = get_resource_service(ARCHIVE).find_one(req=None, _id=item[GUID_FIELD])
                if package_item:
                    linked_in_packages = [linked for linked in package_item.get(LINKED_IN_PACKAGES, [])
                                          if linked.get(PACKAGE) != original.get(config.ID_FIELD)]
                    super().system_update(package_item[config.ID_FIELD],
                                          {LINKED_IN_PACKAGES: linked_in_packages},
                                          package_item)

            # keep the structure of old group in order to be able to unspike the package
            updates[DELETED_GROUPS] = original[GROUPS]
            # and remove all the items from the package
            updates['groups'] = []

        item = self.backend.update(self.datasource, id, updates, original)
        push_notification('item:spike', item=str(id), user=str(user.get(config.ID_FIELD)))

        history_updates = dict(updates)
        if original.get('task'):
            history_updates['task'] = original.get('task')
        app.on_archive_item_updated(history_updates, original, ITEM_SPIKE)
        self._removed_refs_from_package(id)
        return item
Exemplo n.º 18
0
    def on_create(self, docs, **kwargs):
        user_id = get_user(required=True)['_id']
        session_id = get_auth()['_id']

        existing_locks = list(self.find(where={}))
        for existing_lock in existing_locks:
            if str(existing_lock.get(LOCK_USER)) != str(user_id):
                raise SuperdeskApiError.forbiddenError(
                    message=
                    "Featured stories already being managed by another user.")
            elif str(existing_lock.get(LOCK_SESSION)) != str(session_id):
                raise SuperdeskApiError.forbiddenError(
                    message=
                    "Featured stories already being managed by you in another session."
                )

        # get the lock if not raise forbidden exception
        if not lock(LOCK_ID, expire=5):
            raise SuperdeskApiError.forbiddenError(
                message="Unable to obtain lock on Featured stories.")

        for doc in docs:
            doc['_id'] = generate_guid(type=GUID_NEWSML)
            lock_updates = {
                LOCK_USER: user_id,
                LOCK_SESSION: session_id,
                LOCK_TIME: utcnow()
            }
            doc.update(lock_updates)

        return docs
Exemplo n.º 19
0
    def on_create(self, docs):
        # events generated by recurring rules
        generated_events = []
        for event in docs:
            # generates an unique id
            if 'guid' not in event:
                event['guid'] = generate_guid(type=GUID_NEWSML)
            event['_id'] = event['guid']
            # set the author
            set_original_creator(event)

            # overwrite expiry date
            overwrite_event_expiry_date(event)

            # We ignore the 'update_method' on create
            if 'update_method' in event:
                del event['update_method']

            # generates events based on recurring rules
            if event['dates'].get('recurring_rule', None):
                generated_events.extend(generate_recurring_events(event))
                # remove the event that contains the recurring rule. We don't need it anymore
                docs.remove(event)
        if generated_events:
            docs.extend(generated_events)
Exemplo n.º 20
0
    def setUp(self):
        super().setUp()
        dirname = os.path.dirname(os.path.realpath(__file__))
        image_path = os.path.normpath(
            os.path.join(dirname, "fixtures", self.filename))
        content_type = mimetypes.guess_type(image_path)[0]
        guid = utils.generate_guid(type=GUID_TAG)
        self.item = {
            "guid": guid,
            "version": 1,
            "_id": guid,
            ITEM_TYPE: CONTENT_TYPE.PICTURE,
            "mimetype": content_type,
            "versioncreated": datetime.now(),
        }

        with open(image_path, "rb") as f:
            _, content_type, file_metadata = process_file_from_stream(
                f, content_type=content_type)
            f.seek(0)
            file_id = app.media.put(f,
                                    filename=self.filename,
                                    content_type=content_type,
                                    metadata=file_metadata)
            filemeta.set_filemeta(self.item, file_metadata)
            f.seek(0)
            rendition_spec = get_renditions_spec()
            renditions = generate_renditions(f, file_id, [file_id], "image",
                                             content_type, rendition_spec,
                                             url_for_media)
            self.item["renditions"] = renditions
        archive = get_resource_service("archive")
        archive.post([self.item])
Exemplo n.º 21
0
    def _create_image_items(self, image_links, text_item):
        """Create a list of picture items that represent the external images
        located on given URLs.

        Each created item's `firstcreated` and `versioncreated` fields are set
        to the same value as the values of these fields in `text_item`.

        :param iterable image_links: list of image URLs
        :param dict text_item: the "main" text item the images are related to

        :return: list of created image items (as dicts)
        """
        image_items = []

        for image_url in image_links:
            img_item = {
                'guid': generate_guid(type=GUID_TAG),
                ITEM_TYPE: CONTENT_TYPE.PICTURE,
                'firstcreated': text_item.get('firstcreated'),
                'versioncreated': text_item.get('versioncreated'),
                'renditions': {
                    'baseImage': {
                        'href': image_url
                    }
                }
            }
            image_items.append(img_item)

        return image_items
Exemplo n.º 22
0
    def delete(self, lookup):
        target_id = request.view_args["target_id"]
        archive_service = get_resource_service(ARCHIVE)
        target = archive_service.find_one(req=None, _id=target_id)
        updates = {}

        if target.get("rewrite_of"):
            # remove the rewrite info
            ArchiveSpikeService().update_rewrite(target)

        if not target.get("rewrite_of"):
            # there is nothing to do
            raise SuperdeskApiError.badRequestError(
                _("Only updates can be unlinked!"))

        if target.get("rewrite_of"):
            updates["rewrite_of"] = None

        if target.get("anpa_take_key"):
            updates["anpa_take_key"] = None

        if target.get("rewrite_sequence"):
            updates["rewrite_sequence"] = None

        if target.get("sequence"):
            updates["sequence"] = None

        updates["event_id"] = generate_guid(type=GUID_TAG)

        archive_service.system_update(target_id, updates, target)
        user = get_user(required=True)
        push_notification("item:unlink",
                          item=target_id,
                          user=str(user.get(config.ID_FIELD)))
        app.on_archive_item_updated(updates, target, ITEM_UNLINK)
Exemplo n.º 23
0
def on_duplicate_item(doc):
    """Make sure duplicated item has basic fields populated."""

    doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)
    generate_unique_id_and_name(doc)
    doc.setdefault('_id', doc[GUID_FIELD])
    set_sign_off(doc)
    def setUp(self):
        super().setUp()
        dirname = os.path.dirname(os.path.realpath(__file__))
        image_path = os.path.normpath(os.path.join(dirname, 'fixtures', self.filename))
        content_type = mimetypes.guess_type(image_path)[0]
        guid = utils.generate_guid(type=GUID_TAG)
        self.item = {'guid': guid,
                     'version': 1,
                     '_id': guid,
                     ITEM_TYPE: CONTENT_TYPE.PICTURE,
                     'mimetype': content_type,
                     'versioncreated': datetime.now()
                     }

        with open(image_path, 'rb') as f:
            _, content_type, file_metadata = process_file_from_stream(f, content_type=content_type)
            f.seek(0)
            file_id = app.media.put(f, filename=self.filename, content_type=content_type, metadata=file_metadata)
            filemeta.set_filemeta(self.item, file_metadata)
            f.seek(0)
            rendition_spec = get_renditions_spec()
            renditions = generate_renditions(f, file_id, [file_id], 'image',
                                             content_type, rendition_spec, url_for_media)
            self.item['renditions'] = renditions
        archive = get_resource_service('archive')
        archive.post([self.item])
Exemplo n.º 25
0
    def test_trans_attributes(self):
        self.assertEqual(self.items[0].get(ITEM_TYPE), CONTENT_TYPE.TEXT)
        self.assertEqual(self.items[0].get('subject')[0].get('qcode'), '11016007')

        guid_hash = hashlib.sha1('https://www.example.com//12345'.encode('utf8')).hexdigest()
        guid = generate_guid(type=GUID_TAG, id=guid_hash)
        self.assertEqual(self.items[0].get('guid'), guid)
Exemplo n.º 26
0
    def test_trans_attributes(self):
        self.assertEqual(self.items[0].get(ITEM_TYPE), CONTENT_TYPE.TEXT)
        self.assertEqual(self.items[0].get("subject")[0].get("qcode"), "11016007")

        guid_hash = hashlib.sha1("https://www.example.com//12345".encode("utf8")).hexdigest()
        guid = generate_guid(type=GUID_TAG, id=guid_hash)
        self.assertEqual(self.items[0].get("guid"), guid)
Exemplo n.º 27
0
    def _create_image_items(self, image_links, text_item):
        """Create a list of picture items that represent the external images
        located on given URLs.

        Each created item's `firstcreated` and `versioncreated` fields are set
        to the same value as the values of these fields in `text_item`.

        :param iterable image_links: list of image URLs
        :param dict text_item: the "main" text item the images are related to

        :return: list of created image items (as dicts)
        """
        image_items = []

        for image_url in image_links:
            img_item = {
                "guid": generate_guid(type=GUID_TAG),
                ITEM_TYPE: CONTENT_TYPE.PICTURE,
                "firstcreated": text_item.get("firstcreated"),
                "versioncreated": text_item.get("versioncreated"),
                "renditions": {"baseImage": {"href": image_url}},
            }
            image_items.append(img_item)

        return image_items
Exemplo n.º 28
0
    def _create_package(self, text_item, image_items):
        """Create a new content package from given content items.

        The package's `main` group contains only the references to given items,
        not the items themselves. In the list of references, the reference to
        the text item preceeds the references to image items.

        Package's `firstcreated` and `versioncreated` fields are set to values
        of these fields in `text_item`, and the `headline` is copied as well.

        :param dict text_item: item representing the text content
        :param list image_items: list of items (dicts) representing the images
            related to the text content
        :return: the created content package
        :rtype: dict
        """
        package = {
            ITEM_TYPE: CONTENT_TYPE.COMPOSITE,
            "guid": generate_guid(type=GUID_TAG),
            "firstcreated": text_item["firstcreated"],
            "versioncreated": text_item["versioncreated"],
            "headline": text_item.get("headline", ""),
            "groups": [
                {"id": "root", "role": "grpRole:NEP", "refs": [{"idRef": "main"}]},
                {"id": "main", "role": "main", "refs": []},
            ],
        }

        item_references = package["groups"][1]["refs"]
        item_references.append({"residRef": text_item["guid"]})

        for image in image_items:
            item_references.append({"residRef": image["guid"]})

        return package
Exemplo n.º 29
0
 def on_create(self, docs):
     """Set default metadata"""
     for doc in docs:
         doc['guid'] = generate_guid(type=GUID_NEWSML)
         doc['planning_type'] = 'agenda'
         set_original_creator(doc)
         self._validate_unique_agenda(doc, {})
    def setUp(self):
        super().setUp()
        dirname = os.path.dirname(os.path.realpath(__file__))
        image_path = os.path.normpath(
            os.path.join(dirname, 'fixtures', self.filename))
        content_type = mimetypes.guess_type(image_path)[0]
        guid = utils.generate_guid(type=GUID_TAG)
        self.item = {
            'guid': guid,
            'version': 1,
            '_id': guid,
            ITEM_TYPE: CONTENT_TYPE.PICTURE,
            'mimetype': content_type,
            'versioncreated': datetime.now()
        }

        with open(image_path, 'rb') as f:
            _, content_type, file_metadata = process_file_from_stream(
                f, content_type=content_type)
            f.seek(0)
            file_id = app.media.put(f,
                                    filename=self.filename,
                                    content_type=content_type,
                                    metadata=file_metadata)
            filemeta.set_filemeta(self.item, file_metadata)
            f.seek(0)
            rendition_spec = get_renditions_spec()
            renditions = generate_renditions(f, file_id, [file_id], 'image',
                                             content_type, rendition_spec,
                                             url_for_media)
            self.item['renditions'] = renditions
        archive = get_resource_service('archive')
        archive.post([self.item])
Exemplo n.º 31
0
def fetch_item(doc, desk_id, stage_id, state=None, target=None):
    dest_doc = dict(doc)

    if target:
        # set target subscriber info
        dest_doc.update(target)

    new_id = generate_guid(type=GUID_TAG)
    if doc.get("guid"):
        dest_doc.setdefault("uri", doc[GUID_FIELD])

    dest_doc[config.ID_FIELD] = new_id
    dest_doc[GUID_FIELD] = new_id
    generate_unique_id_and_name(dest_doc)

    # avoid circular import
    from apps.tasks import send_to

    dest_doc[config.VERSION] = 1
    dest_doc["versioncreated"] = utcnow()
    send_to(doc=dest_doc, desk_id=desk_id, stage_id=stage_id)
    dest_doc[ITEM_STATE] = state or CONTENT_STATE.FETCHED

    dest_doc[FAMILY_ID] = doc[config.ID_FIELD]
    dest_doc[INGEST_ID] = doc[config.ID_FIELD]
    dest_doc[ITEM_OPERATION] = ITEM_FETCH

    remove_unwanted(dest_doc)
    set_original_creator(dest_doc)
    return dest_doc
Exemplo n.º 32
0
    def _create_image_items(self, image_links, text_item):
        image_items = []

        for image_url in image_links:
            guid_hash = hashlib.sha1(image_url.encode("utf8")).hexdigest()
            img_item = {
                "guid":
                generate_guid(type=GUID_TAG,
                              id=text_item.get("guid") + guid_hash + "-image"),
                ITEM_TYPE:
                CONTENT_TYPE.PICTURE,
                "versioncreated":
                text_item.get("versioncreated"),
                "firstcreated":
                text_item.get("firstcreated"),
                "headline":
                text_item.get("headline", ""),
                "renditions": {
                    "baseImage": {
                        "href": image_url
                    }
                },
            }
            image_items.append(img_item)

        return image_items
Exemplo n.º 33
0
    def delete(self, lookup):
        target_id = request.view_args['target_id']
        archive_service = get_resource_service(ARCHIVE)
        target = archive_service.find_one(req=None, _id=target_id)
        updates = {}

        if target.get('rewrite_of'):
            # remove the rewrite info
            ArchiveSpikeService().update_rewrite(target)

        if not target.get('rewrite_of'):
            # there is nothing to do
            raise SuperdeskApiError.badRequestError("Only updates can be unlinked!")

        if target.get('rewrite_of'):
            updates['rewrite_of'] = None

        if target.get('anpa_take_key'):
            updates['anpa_take_key'] = None

        if target.get('rewrite_sequence'):
            updates['rewrite_sequence'] = None

        if target.get('sequence'):
            updates['sequence'] = None

        updates['event_id'] = generate_guid(type=GUID_TAG)

        archive_service.system_update(target_id, updates, target)
        user = get_user(required=True)
        push_notification('item:unlink', item=target_id, user=str(user.get(config.ID_FIELD)))
        app.on_archive_item_updated(updates, target, ITEM_UNLINK)
Exemplo n.º 34
0
    def update(self, id, updates, original):
        original_state = original[ITEM_STATE]
        if not is_workflow_state_transition_valid(ITEM_SPIKE, original_state):
            raise InvalidStateTransitionError()

        user = get_user(required=True)
        item = get_resource_service(ARCHIVE).find_one(req=None, _id=id)
        task = item.get('task', {})

        updates[EXPIRY] = self._get_spike_expiry(desk_id=task.get('desk'),
                                                 stage_id=task.get('stage'))
        updates[REVERT_STATE] = item.get(ITEM_STATE, None)

        if original.get('rewrite_of'):
            updates['rewrite_of'] = None

        if original.get('rewritten_by'):
            updates['rewritten_by'] = None

        if original.get('broadcast'):
            updates['broadcast'] = None

        if original.get('rewrite_sequence'):
            updates['rewrite_sequence'] = None

        # remove any relation with linked items
        updates[ITEM_EVENT_ID] = generate_guid(type=GUID_TAG)

        if original[ITEM_TYPE] == CONTENT_TYPE.COMPOSITE:
            # remove links from items in the package
            package_service = PackageService()
            items = package_service.get_item_refs(original)
            for item in items:
                package_item = get_resource_service(ARCHIVE).find_one(
                    req=None, _id=item[GUID_FIELD])
                if package_item:
                    linked_in_packages = [
                        linked
                        for linked in package_item.get(LINKED_IN_PACKAGES, [])
                        if linked.get(PACKAGE) != original.get(config.ID_FIELD)
                    ]
                    super().system_update(
                        package_item[config.ID_FIELD],
                        {LINKED_IN_PACKAGES: linked_in_packages}, package_item)

            # and remove all the items from the package
            updates['groups'] = []

        item = self.backend.update(self.datasource, id, updates, original)
        push_notification('item:spike',
                          item=str(id),
                          user=str(user.get(config.ID_FIELD)))

        history_updates = dict(updates)
        if original.get('task'):
            history_updates['task'] = original.get('task')
        app.on_archive_item_updated(history_updates, original, ITEM_SPIKE)
        self._removed_refs_from_package(id)
        return item
Exemplo n.º 35
0
    def parse(self, file_path, provider=None):
        try:
            item = {
                ITEM_TYPE: CONTENT_TYPE.PREFORMATTED,
                "guid": generate_guid(type=GUID_TAG),
                "versioncreated": utcnow(),
            }

            with open(file_path, "rb") as f:
                lines = [line for line in f]
            # parse first header line
            m = re.match(b"\x01([a-zA-Z]*)([0-9]*) (.) (.) ([0-9]*) ([a-zA-Z0-9 ]*)", lines[0], flags=re.I)
            if m:
                item["original_source"] = m.group(1).decode("latin-1", "replace")
                item["ingest_provider_sequence"] = m.group(2).decode()
                item["priority"] = self.map_priority(m.group(3).decode())
                item["anpa_category"] = [{"qcode": self.map_category(m.group(4).decode())}]
                item["word_count"] = int(m.group(5).decode())

            inHeader = True
            inText = False
            inNote = False
            for line in lines[1:]:
                # STX starts the body of the story
                if line[0:1] == b"\x02":
                    # pick the rest of the line off as the headline
                    item["headline"] = line[1:].decode("latin-1", "replace").rstrip("\r\n")
                    item["body_html"] = ""
                    inText = True
                    inHeader = False
                    continue
                # ETX denotes the end of the story
                if line[0:1] == b"\x03":
                    break
                if inText:
                    if (
                        line.decode("latin-1", "replace").find("The following information is not for publication") != -1
                        or line.decode("latin-1", "replace").find(
                            "The following information is not intended for publication"
                        )
                        != -1
                    ):
                        inNote = True
                        inText = False
                        item["ednote"] = ""
                        continue
                    item["body_html"] += line.decode("latin-1", "replace")
                if inNote:
                    item["ednote"] += line.decode("latin-1", "replace")
                    continue
                if inHeader:
                    if "slugline" not in item:
                        item["slugline"] = ""
                    item["slugline"] += line.decode("latin-1", "replace").rstrip("/\r\n")
                    continue

            return item
        except Exception as ex:
            raise ParserError.IPTC7901ParserError(exception=ex, provider=provider)
Exemplo n.º 36
0
def ingest_item(item, provider, rule_set=None, routing_scheme=None):
    try:
        item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML))
        item[FAMILY_ID] = item[superdesk.config.ID_FIELD]
        providers[provider.get('type')].provider = provider

        item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault('source', provider.get('source', ''))
        set_default_state(item, STATE_INGESTED)
        item['expiry'] = get_expiry_date(provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES']),
                                         item.get('versioncreated'))

        if 'anpa_category' in item:
            process_anpa_category(item, provider)

        if 'subject' in item:
            process_iptc_codes(item, provider)
            if 'anpa_category' not in item:
                derive_category(item, provider)
        elif 'anpa_category' in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        ingest_service = superdesk.get_resource_service('ingest')

        if item.get('ingest_provider_sequence') is None:
            ingest_service.set_ingest_provider_sequence(item, provider)

        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend:
                href = providers[provider.get('type')].prepare_href(baseImageRend['href'])
                update_renditions(item, href, old_item)

        if old_item:
            # In case we already have the item, preserve the _id
            item[superdesk.config.ID_FIELD] = old_item[superdesk.config.ID_FIELD]
            ingest_service.put_in_mongo(item[superdesk.config.ID_FIELD], item)
        else:
            try:
                ingest_service.post_in_mongo([item])
            except HTTPException as e:
                logger.error("Exception while persisting item in ingest collection", e)

        if routing_scheme:
            routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme)
    except Exception as ex:
        logger.exception(ex)
        try:
            superdesk.app.sentry.captureException()
        except:
            pass
        return False
    return True
Exemplo n.º 37
0
def generate_recurring_events(event):
    generated_events = []
    setRecurringMode(event)

    # Get the recurrence_id, or generate one if it doesn't exist
    recurrence_id = event.get('recurrence_id', generate_guid(type=GUID_NEWSML))

    # compute the difference between start and end in the original event
    time_delta = event['dates']['end'] - event['dates']['start']
    # for all the dates based on the recurring rules:
    for date in itertools.islice(
            generate_recurring_dates(
                start=event['dates']['start'],
                tz=event['dates'].get('tz')
                and pytz.timezone(event['dates']['tz'] or None),
                **event['dates']['recurring_rule']), 0,
            get_max_recurrent_events()
    ):  # set a limit to prevent too many events to be created
        # create event with the new dates
        new_event = copy.deepcopy(event)

        # Remove fields not required by the new events
        for key in list(new_event.keys()):
            if key.startswith('_'):
                new_event.pop(key)
            elif key.startswith('lock_'):
                new_event.pop(key)
        new_event.pop('pubstatus', None)
        new_event.pop('reschedule_from', None)

        new_event['dates']['start'] = date
        new_event['dates']['end'] = date + time_delta
        # set a unique guid
        new_event['guid'] = generate_guid(type=GUID_NEWSML)
        new_event['_id'] = new_event['guid']
        # set the recurrence id
        new_event['recurrence_id'] = recurrence_id

        # set expiry date
        overwrite_event_expiry_date(new_event)
        # the _planning_schedule
        set_planning_schedule(new_event)
        generated_events.append(new_event)

    return generated_events
Exemplo n.º 38
0
    def test_trans_attributes(self):
        self.assertEqual(self.items[0].get(ITEM_TYPE), CONTENT_TYPE.TEXT)
        self.assertEqual(self.items[0].get('subject')[0].get('qcode'),
                         '11016007')

        guid_hash = hashlib.sha1(
            'https://www.example.com//12345'.encode('utf8')).hexdigest()
        guid = generate_guid(type=GUID_TAG, id=guid_hash)
        self.assertEqual(self.items[0].get('guid'), guid)
Exemplo n.º 39
0
    def parse_file(self, filename, provider):
        """Parse 7901 file by given filename.

        :param filename
        """
        try:
            item = {ITEM_TYPE: CONTENT_TYPE.PREFORMATTED}
            item['guid'] = generate_guid(type=GUID_TAG)
            item['versioncreated'] = utcnow()

            with open(filename, 'rb') as f:
                lines = [line for line in f]
            # parse first header line
            m = re.match(b'\x01([a-zA-Z]*)([0-9]*) (.) (.) ([0-9]*) ([a-zA-Z0-9 ]*)', lines[0], flags=re.I)
            if m:
                item['original_source'] = m.group(1).decode('latin-1', 'replace')
                item['ingest_provider_sequence'] = m.group(2).decode()
                item['priority'] = self.map_priority(m.group(3).decode())
                item['anpa_category'] = [{'qcode': self.map_category(m.group(4).decode())}]
                item['word_count'] = int(m.group(5).decode())

            inHeader = True
            inText = False
            inNote = False
            for line in lines[1:]:
                # STX starts the body of the story
                if line[0:1] == b'\x02':
                    # pick the rest of the line off as the headline
                    item['headline'] = line[1:].decode('latin-1', 'replace').rstrip('\r\n')
                    item['body_html'] = ''
                    inText = True
                    inHeader = False
                    continue
                # ETX denotes the end of the story
                if line[0:1] == b'\x03':
                    break
                if inText:
                    if line.decode('latin-1', 'replace').find('The following information is not for publication') != -1 \
                            or line.decode('latin-1', 'replace').find(
                                'The following information is not intended for publication') != -1:
                        inNote = True
                        inText = False
                        item['ednote'] = ''
                        continue
                    item['body_html'] += line.decode('latin-1', 'replace')
                if inNote:
                    item['ednote'] += line.decode('latin-1', 'replace')
                    continue
                if inHeader:
                    if 'slugline' not in item:
                        item['slugline'] = ''
                    item['slugline'] += line.decode('latin-1', 'replace').rstrip('/\r\n')
                    continue

            return item
        except Exception as ex:
            raise ParserError.IPTC7901ParserError(exception=ex, provider=provider)
Exemplo n.º 40
0
    def on_create(self, docs):
        """Set default metadata."""

        for doc in docs:
            if 'guid' not in doc:
                doc['guid'] = generate_guid(type=GUID_NEWSML)
            doc[config.ID_FIELD] = doc['guid']
            set_original_creator(doc)
            self._set_planning_event_date(doc)
Exemplo n.º 41
0
def on_duplicate_item(doc):
    """Make sure duplicated item has basic fields populated."""

    doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)
    generate_unique_id_and_name(doc)
    doc.setdefault('_id', doc[GUID_FIELD])
    set_sign_off(doc)
    doc['force_unlock'] = True
    doc[ITEM_OPERATION] = ITEM_DUPLICATE
Exemplo n.º 42
0
def on_create_item(docs, repo_type=ARCHIVE):
    """Make sure item has basic fields populated."""

    for doc in docs:
        update_dates_for(doc)
        set_original_creator(doc)

        if not doc.get(GUID_FIELD):
            doc[GUID_FIELD] = generate_guid(type=GUID_NEWSML)

        if 'unique_id' not in doc:
            generate_unique_id_and_name(doc, repo_type)

        if 'family_id' not in doc:
            doc['family_id'] = doc[GUID_FIELD]

        if 'event_id' not in doc and repo_type != 'ingest':
            doc['event_id'] = generate_guid(type=GUID_TAG)

        set_default_state(doc, CONTENT_STATE.DRAFT)
        doc.setdefault(config.ID_FIELD, doc[GUID_FIELD])

        if repo_type == ARCHIVE:
            # set the source for the article
            set_default_source(doc)

        if 'profile' not in doc and app.config.get('DEFAULT_CONTENT_TYPE', None):
            doc['profile'] = app.config.get('DEFAULT_CONTENT_TYPE', None)

        copy_metadata_from_profile(doc)
        copy_metadata_from_user_preferences(doc, repo_type)

        if 'language' not in doc:
            doc['language'] = app.config.get('DEFAULT_LANGUAGE', 'en')

            if doc.get('task', None) and doc['task'].get('desk', None):
                desk = superdesk.get_resource_service('desks').find_one(req=None, _id=doc['task']['desk'])
                if desk and desk.get('desk_language', None):
                    doc['language'] = desk['desk_language']

        if not doc.get(ITEM_OPERATION):
            doc[ITEM_OPERATION] = ITEM_CREATE
Exemplo n.º 43
0
    def create(self, docs, **kwargs):
        search_provider = get_resource_service('search_providers').find_one(
            search_provider=PROVIDER_NAME, req=None)

        if not search_provider or search_provider.get('is_closed', False):
            raise SuperdeskApiError.badRequestError(
                'No search provider found or the search provider is closed.')

        if 'config' in search_provider:
            self.backend.set_credentials(search_provider['config'])

        new_guids = []
        for doc in docs:
            if not doc.get(
                    'desk'):  # if no desk is selected then it is bad request
                raise SuperdeskApiError.badRequestError(
                    "Destination desk cannot be empty.")

            try:
                archived_doc = self.backend.find_one_raw(
                    doc['guid'], doc['guid'])
            except FileNotFoundError as ex:
                raise ProviderError.externalProviderError(ex, search_provider)

            dest_doc = dict(archived_doc)
            new_id = generate_guid(type=GUID_TAG)
            new_guids.append(new_id)
            dest_doc[config.ID_FIELD] = new_id
            generate_unique_id_and_name(dest_doc)

            if search_provider:
                dest_doc['ingest_provider'] = str(
                    search_provider[config.ID_FIELD])

            dest_doc[config.VERSION] = 1
            send_to(doc=dest_doc,
                    update=None,
                    desk_id=doc.get('desk'),
                    stage_id=doc.get('stage'))
            dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED)
            dest_doc[INGEST_ID] = archived_doc[config.ID_FIELD]
            dest_doc[FAMILY_ID] = archived_doc[config.ID_FIELD]
            dest_doc[ITEM_OPERATION] = ITEM_FETCH
            remove_unwanted(dest_doc)
            set_original_creator(dest_doc)

            superdesk.get_resource_service(ARCHIVE).post([dest_doc])
            insert_into_versions(dest_doc[config.ID_FIELD])

            get_resource_service('search_providers').system_update(
                search_provider[config.ID_FIELD],
                {'last_item_update': utcnow()}, search_provider)

        return new_guids
Exemplo n.º 44
0
    def parse(self, file_path, provider=None):
        try:
            item = {ITEM_TYPE: CONTENT_TYPE.TEXT, 'guid': generate_guid(type=GUID_TAG),
                    'versioncreated': utcnow()}

            with open(file_path, 'rb') as f:
                lines = [line for line in f]
            # parse first header line
            m = re.match(b'\x01([a-zA-Z]*)([0-9]*) (.) (.) ([0-9]*) ([a-zA-Z0-9 ]*)', lines[0], flags=re.I)
            if m:
                item['original_source'] = m.group(1).decode('latin-1', 'replace')
                item['ingest_provider_sequence'] = m.group(2).decode()
                item['priority'] = self.map_priority(m.group(3).decode())
                item['anpa_category'] = [{'qcode': self.map_category(m.group(4).decode())}]
                item['word_count'] = int(m.group(5).decode())

            inHeader = True
            inText = False
            inNote = False
            for line in lines[1:]:
                # STX starts the body of the story
                if line[0:1] == b'\x02':
                    # pick the rest of the line off as the headline
                    item['headline'] = line[1:].decode('latin-1', 'replace').rstrip('\r\n')
                    item['body_html'] = ''
                    inText = True
                    inHeader = False
                    continue
                # ETX denotes the end of the story
                if line[0:1] == b'\x03':
                    break
                if inText:
                    if line.decode('latin-1', 'replace')\
                            .find('The following information is not for publication') != -1 \
                            or line.decode('latin-1', 'replace').find(
                                'The following information is not intended for publication') != -1:
                        inNote = True
                        inText = False
                        item['ednote'] = ''
                        continue
                    item['body_html'] += line.decode('latin-1', 'replace')
                if inNote:
                    item['ednote'] += line.decode('latin-1', 'replace')
                    continue
                if inHeader:
                    if 'slugline' not in item:
                        item['slugline'] = ''
                    item['slugline'] += line.decode('latin-1', 'replace').rstrip('/\r\n')
                    continue

            return item
        except Exception as ex:
            raise ParserError.IPTC7901ParserError(exception=ex, provider=provider)
Exemplo n.º 45
0
    def fetch(self, docs, id=None, **kwargs):
        id_of_fetched_items = []

        for doc in docs:
            id_of_item_to_be_fetched = doc.get(config.ID_FIELD) if id is None else id

            desk_id = doc.get('desk')
            stage_id = doc.get('stage')

            ingest_service = get_resource_service('ingest')
            ingest_doc = ingest_service.find_one(req=None, _id=id_of_item_to_be_fetched)

            if not ingest_doc:
                raise SuperdeskApiError.notFoundError('Fail to found ingest item with _id: %s' %
                                                      id_of_item_to_be_fetched)

            if not is_workflow_state_transition_valid('fetch_from_ingest', ingest_doc[ITEM_STATE]):
                raise InvalidStateTransitionError()

            if doc.get('macro'):  # there is a macro so transform it
                ingest_doc = get_resource_service('macros').execute_macro(ingest_doc, doc.get('macro'))

            archived = utcnow()
            ingest_service.patch(id_of_item_to_be_fetched, {'archived': archived})

            dest_doc = dict(ingest_doc)
            new_id = generate_guid(type=GUID_TAG)
            id_of_fetched_items.append(new_id)
            dest_doc[config.ID_FIELD] = new_id
            dest_doc[GUID_FIELD] = new_id
            generate_unique_id_and_name(dest_doc)

            dest_doc[config.VERSION] = 1
            send_to(doc=dest_doc, desk_id=desk_id, stage_id=stage_id)
            dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED)
            dest_doc[INGEST_ID] = dest_doc[FAMILY_ID] = ingest_doc[config.ID_FIELD]
            dest_doc[ITEM_OPERATION] = ITEM_FETCH

            remove_unwanted(dest_doc)
            set_original_creator(dest_doc)
            self.__fetch_items_in_package(dest_doc, desk_id, stage_id,
                                          doc.get(ITEM_STATE, CONTENT_STATE.FETCHED))

            get_resource_service(ARCHIVE).post([dest_doc])
            insert_into_versions(doc=dest_doc)
            build_custom_hateoas(custom_hateoas, dest_doc)
            doc.update(dest_doc)

        if kwargs.get('notify', True):
            push_notification('item:fetch', fetched=1)

        return id_of_fetched_items
    def _parse(self, xml):
        """
        Parse xml document and returns list of events

        :param xml: xml document
        :type xml: lxml.etree._Element
        :return: a list of events
        """

        items = []
        documents = []

        # http events api xml
        if xml.tag == self.SUPPORTED_ROOT_TAGS.RESULT:
            documents = xml.xpath('./document')
        # ftp events xml
        elif xml.tag == self.SUPPORTED_ROOT_TAGS.DOCUMENT:
            documents = [xml]

        self._prefetch_contacts()
        self._prefetch_vocabularies_items()

        for document in documents:
            item = {
                ITEM_TYPE: CONTENT_TYPE.EVENT,
                FORMAT: FORMATS.PRESERVED,
                GUID_FIELD: generate_guid(type=GUID_NEWSML),
                'firstcreated': utcnow(),
                'versioncreated': utcnow()
            }
            self._fill_ntb_id(document, item)
            self._fill_name(document, item)
            self._fill_dates(document, item)
            if 'start' not in item['dates'] or 'end' not in item['dates']:
                # explicit ignoring items without start/end dates.
                continue
            self._fill_definition_short(document, item)
            self._fill_priority(document, item)
            self._fill_category(document, item)
            self._fill_calendars(item)
            self._fill_anpa_category(document, item)
            self._fill_location(document, item)
            self._fill_subject(document, item)
            self._fill_slugline(item)
            self._fill_occur_status(item)
            self._fill_internal_note(document, item)
            self._fill_links(document, item)
            self._fill_event_contact_info(document, item)

            items.append(item)

        return items
    def _duplicate_planning(self, original):
        new_plan = deepcopy(original)

        for f in ('_id', 'guid', 'lock_user', 'lock_time', 'original_creator',
                  '_coverages'
                  'lock_session', 'lock_action', '_created', '_updated',
                  '_etag', 'pubstatus'):
            new_plan.pop(f, None)

        new_plan[ITEM_STATE] = WORKFLOW_STATE.IN_PROGRESS
        new_plan['guid'] = generate_guid(type=GUID_NEWSML)

        return new_plan
Exemplo n.º 48
0
    def parse_item(self, image_path):
        filename = os.path.basename(image_path)
        content_type = mimetypes.guess_type(image_path)[0]
        guid = utils.generate_guid(type=GUID_TAG)
        item = {'guid': guid,
                config.VERSION: 1,
                config.ID_FIELD: guid,
                ITEM_TYPE: CONTENT_TYPE.PICTURE,
                'mimetype': content_type,
                'versioncreated': datetime.now()
                }
        with open(image_path, 'rb') as f:
            _, content_type, file_metadata = process_file_from_stream(f, content_type=content_type)
            f.seek(0)
            file_id = app.media.put(f, filename=filename, content_type=content_type, metadata=file_metadata)
            filemeta.set_filemeta(item, file_metadata)
            f.seek(0)
            metadata = get_meta_iptc(f)
            f.seek(0)
            rendition_spec = get_renditions_spec(no_custom_crops=True)
            renditions = generate_renditions(f, file_id, [file_id], 'image',
                                             content_type, rendition_spec, url_for_media)
            item['renditions'] = renditions

        try:
            date_created, time_created = metadata[TAG.DATE_CREATED], metadata[TAG.TIME_CREATED]
        except KeyError:
            pass
        else:
            # we format proper ISO 8601 date so we can parse it with dateutil
            datetime_created = '{}-{}-{}T{}:{}:{}{}{}:{}'.format(date_created[0:4],
                                                                 date_created[4:6],
                                                                 date_created[6:8],
                                                                 time_created[0:2],
                                                                 time_created[2:4],
                                                                 time_created[4:6],
                                                                 time_created[6],
                                                                 time_created[7:9],
                                                                 time_created[9:])
            item['firstcreated'] = dateutil.parser.parse(datetime_created)

        # now we map IPTC metadata to superdesk metadata
        for source_key, dest_key in IPTC_MAPPING.items():
            try:
                item[dest_key] = metadata[source_key]
            except KeyError:
                continue
        return item
Exemplo n.º 49
0
    def _parse_association(self, association):
        """Parses a BBC ninjs association
        :param association:
        :return: association dict
        """
        key = association.pop('id')
        # BBC don't use 'featuremedia', they typically use 'featureimage'
        if re.match('^feature', key):
            key = 'featuremedia'

        parsed = deepcopy(association)

        parsed[ITEM_TYPE] = CONTENT_TYPE.PICTURE
        url = association['renditions']['original']['href']
        guid_hash = hashlib.sha1(url.encode('utf8')).hexdigest()
        parsed['guid'] = generate_guid(type=GUID_TAG, id=guid_hash)

        return key, parsed
Exemplo n.º 50
0
    def create(self, docs, **kwargs):
        search_provider = get_resource_service('search_providers').find_one(search_provider=PROVIDER_NAME, req=None)

        if not search_provider or search_provider.get('is_closed', False):
            raise SuperdeskApiError.badRequestError('No search provider found or the search provider is closed.')

        if 'config' in search_provider:
            self.backend.set_credentials(search_provider['config'])

        new_guids = []
        for doc in docs:
            if not doc.get('desk'):  # if no desk is selected then it is bad request
                raise SuperdeskApiError.badRequestError("Destination desk cannot be empty.")

            try:
                archived_doc = self.backend.find_one_raw(doc['guid'], doc['guid'])
            except FileNotFoundError as ex:
                raise ProviderError.externalProviderError(ex, search_provider)

            dest_doc = dict(archived_doc)
            new_id = generate_guid(type=GUID_TAG)
            new_guids.append(new_id)
            dest_doc[config.ID_FIELD] = new_id
            generate_unique_id_and_name(dest_doc)

            if search_provider:
                dest_doc['ingest_provider'] = str(search_provider[config.ID_FIELD])

            dest_doc[config.VERSION] = 1
            send_to(doc=dest_doc, update=None, desk_id=doc.get('desk'), stage_id=doc.get('stage'))
            dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED)
            dest_doc[INGEST_ID] = archived_doc[config.ID_FIELD]
            dest_doc[FAMILY_ID] = archived_doc[config.ID_FIELD]
            dest_doc[ITEM_OPERATION] = ITEM_FETCH
            remove_unwanted(dest_doc)
            set_original_creator(dest_doc)

            superdesk.get_resource_service(ARCHIVE).post([dest_doc])
            insert_into_versions(dest_doc[config.ID_FIELD])

            get_resource_service('search_providers').system_update(search_provider[config.ID_FIELD],
                                                                   {'last_item_update': utcnow()}, search_provider)

        return new_guids
Exemplo n.º 51
0
    def _create_package(self, text_item, image_items):
        """Create a new content package from given content items.

        The package's `main` group contains only the references to given items,
        not the items themselves. In the list of references, the reference to
        the text item preceeds the references to image items.

        Package's `firstcreated` and `versioncreated` fields are set to values
        of these fields in `text_item`, and the `headline` is copied as well.

        :param dict text_item: item representing the text content
        :param list image_items: list of items (dicts) representing the images
            related to the text content
        :return: the created content package
        :rtype: dict
        """
        package = {
            ITEM_TYPE: CONTENT_TYPE.COMPOSITE,
            'guid': generate_guid(type=GUID_TAG),
            'firstcreated': text_item['firstcreated'],
            'versioncreated': text_item['versioncreated'],
            'headline': text_item.get('headline', ''),
            'groups': [
                {
                    'id': 'root',
                    'role': 'grpRole:NEP',
                    'refs': [{'idRef': 'main'}],
                }, {
                    'id': 'main',
                    'role': 'main',
                    'refs': [],
                }
            ]
        }

        item_references = package['groups'][1]['refs']
        item_references.append({'residRef': text_item['guid']})

        for image in image_items:
            item_references.append({'residRef': image['guid']})

        return package
Exemplo n.º 52
0
    def ingest_items_for(self, desk, no_of_stories, skip_index):
        desk_id = desk['_id']
        stage_id = desk['incoming_stage']

        bucket_size = min(100, no_of_stories)

        no_of_buckets = len(range(0, no_of_stories, bucket_size))

        for x in range(0, no_of_buckets):
            skip = x * bucket_size * skip_index
            logger.info('Page : {}, skip: {}'.format(x + 1, skip))
            cursor = get_resource_service('published').get_from_mongo(None, {})
            cursor.skip(skip)
            cursor.limit(bucket_size)
            items = list(cursor)
            logger.info('Inserting {} items'.format(len(items)))
            archive_items = []

            for item in items:
                dest_doc = dict(item)
                new_id = generate_guid(type=GUID_TAG)
                dest_doc[app.config['ID_FIELD']] = new_id
                dest_doc['guid'] = new_id
                generate_unique_id_and_name(dest_doc)

                dest_doc[app.config['VERSION']] = 1
                dest_doc[ITEM_STATE] = CONTENT_STATE.FETCHED
                user_id = desk.get('members', [{'user': None}])[0].get('user')
                dest_doc['original_creator'] = user_id
                dest_doc['version_creator'] = user_id

                from apps.tasks import send_to
                send_to(dest_doc, desk_id=desk_id, stage_id=stage_id, user_id=user_id)
                dest_doc[app.config['VERSION']] = 1  # Above step increments the version and needs to reset
                dest_doc[FAMILY_ID] = item['_id']

                remove_unwanted(dest_doc)
                archive_items.append(dest_doc)

            get_resource_service(ARCHIVE).post(archive_items)
            for item in archive_items:
                insert_into_versions(id_=item[app.config['ID_FIELD']])
Exemplo n.º 53
0
    def _create_image_items(self, image_links, text_item):
        image_items = []

        for image_url in image_links:
            guid_hash = hashlib.sha1(image_url.encode('utf8')).hexdigest()
            img_item = {
                'guid': generate_guid(type=GUID_TAG,
                                      id=text_item.get('guid') +
                                      guid_hash + '-image'),
                ITEM_TYPE: CONTENT_TYPE.PICTURE,
                'versioncreated': text_item.get('versioncreated'),
                'firstcreated': text_item.get('firstcreated'),
                'headline': text_item.get('headline', ''),
                'renditions': {
                    'baseImage': {
                        'href': image_url
                    }
                }
            }
            image_items.append(img_item)

        return image_items
Exemplo n.º 54
0
    def _parse_main(self, json):
        """Parses the main body of text and metadata
        :param json:
        :return: dict of article metadata and body
        """
        # No GUID is included so generate one from the link
        main = {}

        guid_hash = hashlib.sha1(json['uri'].encode('utf8')).hexdigest()
        main['guid'] = generate_guid(type=GUID_TAG, id=guid_hash)
        # Copy over all attributes which are the same as Superdesk's ninjs variant
        for copy_property in self.direct_copy_properties:
            if json.get(copy_property) is not None:
                main[copy_property] = json[copy_property]

        main['versioncreated'] = self._parse_date(json['versioncreated'])
        main['firstcreated'] = self._parse_date(json['firstcreated'])

        if json.get('embargotime'):
            main['embargo'] = json['embargotime']

        main['type'] = self._convert_type(json['type'])
        return main
Exemplo n.º 55
0
    def _create_image(self, association, main):
        """Builds an image item from an association
        :param association: The raw association in BBC's ninjs variant
        :param main: The main article body
        :return: A image item dict
        """
        url = association['renditions']['original']['href']
        guid_hash = hashlib.sha1(url.encode('utf8')).hexdigest()

        item = {
            'guid': generate_guid(type=GUID_TAG, id=guid_hash + '-image'),
            ITEM_TYPE: CONTENT_TYPE.PICTURE,
            'versioncreated': main['versioncreated'],
            'firstcreated': main['firstcreated'],
            'headline': association.get('headline', ''),
            'description_text': association.get('description_text', ''),
            'renditions': {
                'baseImage': {
                    'href': url
                }
            }
        }

        return item
Exemplo n.º 56
0
def ingest_item(item, provider, feeding_service, rule_set=None, routing_scheme=None):
    try:
        ingest_service = superdesk.get_resource_service('ingest')

        # determine if we already have this item
        old_item = ingest_service.find_one(guid=item[GUID_FIELD], req=None)

        if not old_item:
            item.setdefault(superdesk.config.ID_FIELD, generate_guid(type=GUID_NEWSML))
            item[FAMILY_ID] = item[superdesk.config.ID_FIELD]

        item['ingest_provider'] = str(provider[superdesk.config.ID_FIELD])
        item.setdefault('source', provider.get('source', ''))
        set_default_state(item, CONTENT_STATE.INGESTED)
        item['expiry'] = get_expiry_date(provider.get('content_expiry', app.config['INGEST_EXPIRY_MINUTES']),
                                         item.get('versioncreated'))

        if 'anpa_category' in item:
            process_anpa_category(item, provider)

        if 'subject' in item:
            process_iptc_codes(item, provider)
            if 'anpa_category' not in item:
                derive_category(item, provider)
        elif 'anpa_category' in item:
            derive_subject(item)

        apply_rule_set(item, provider, rule_set)

        if item.get('pubstatus', '') == 'canceled':
            item[ITEM_STATE] = CONTENT_STATE.KILLED
            ingest_cancel(item)

        rend = item.get('renditions', {})
        if rend:
            baseImageRend = rend.get('baseImage') or next(iter(rend.values()))
            if baseImageRend:
                href = feeding_service.prepare_href(baseImageRend['href'], rend.get('mimetype'))
                update_renditions(item, href, old_item)

        new_version = True
        if old_item:
            updates = deepcopy(item)
            ingest_service.patch_in_mongo(old_item[superdesk.config.ID_FIELD], updates, old_item)
            item.update(old_item)
            item.update(updates)
            # if the feed is versioned and this is not a new version
            if 'version' in item and 'version' in old_item and item.get('version') == old_item.get('version'):
                new_version = False
        else:
            if item.get('ingest_provider_sequence') is None:
                ingest_service.set_ingest_provider_sequence(item, provider)
            try:
                ingest_service.post_in_mongo([item])
            except HTTPException as e:
                logger.error("Exception while persisting item in ingest collection", e)

        if routing_scheme and new_version:
            routed = ingest_service.find_one(_id=item[superdesk.config.ID_FIELD], req=None)
            superdesk.get_resource_service('routing_schemes').apply_routing_scheme(routed, provider, routing_scheme)

    except Exception as ex:
        logger.exception(ex)
        try:
            superdesk.app.sentry.captureException()
        except:
            pass
        return False
    return True
Exemplo n.º 57
0
    def fetch(self, docs, id=None, **kwargs):
        id_of_fetched_items = []

        for doc in docs:
            id_of_item_to_be_fetched = doc.get(config.ID_FIELD) if id is None else id

            desk_id = doc.get("desk")
            stage_id = doc.get("stage")

            ingest_service = get_resource_service("ingest")
            ingest_doc = ingest_service.find_one(req=None, _id=id_of_item_to_be_fetched)

            if not ingest_doc:
                raise SuperdeskApiError.notFoundError(
                    "Fail to found ingest item with _id: %s" % id_of_item_to_be_fetched
                )

            if not is_workflow_state_transition_valid("fetch_from_ingest", ingest_doc[ITEM_STATE]):
                raise InvalidStateTransitionError()

            if doc.get("macro"):  # there is a macro so transform it
                ingest_doc = get_resource_service("macros").execute_macro(ingest_doc, doc.get("macro"))

            archived = utcnow()
            ingest_service.patch(id_of_item_to_be_fetched, {"archived": archived})

            dest_doc = dict(ingest_doc)

            if doc.get("target"):
                dest_doc.update(doc.get("target"))

            new_id = generate_guid(type=GUID_TAG)
            id_of_fetched_items.append(new_id)
            dest_doc[config.ID_FIELD] = new_id
            dest_doc[GUID_FIELD] = new_id
            generate_unique_id_and_name(dest_doc)

            dest_doc[config.VERSION] = 1
            dest_doc["versioncreated"] = archived
            send_to(doc=dest_doc, desk_id=desk_id, stage_id=stage_id)
            dest_doc[ITEM_STATE] = doc.get(ITEM_STATE, CONTENT_STATE.FETCHED)
            dest_doc[INGEST_ID] = dest_doc[FAMILY_ID] = ingest_doc[config.ID_FIELD]
            dest_doc[ITEM_OPERATION] = ITEM_FETCH

            remove_unwanted(dest_doc)
            set_original_creator(dest_doc)
            self.__fetch_items_in_package(dest_doc, desk_id, stage_id, doc.get(ITEM_STATE, CONTENT_STATE.FETCHED))

            desk = get_resource_service("desks").find_one(req=None, _id=desk_id)
            if desk and desk.get("default_content_profile"):
                dest_doc["profile"] = desk["default_content_profile"]

            if dest_doc.get("type", "text") in MEDIA_TYPES:
                dest_doc["profile"] = None

            get_resource_service(ARCHIVE).post([dest_doc])
            insert_into_versions(doc=dest_doc)
            build_custom_hateoas(custom_hateoas, dest_doc)
            doc.update(dest_doc)

        if kwargs.get("notify", True):
            ingest_doc.update({"task": dest_doc.get("task")})
            push_item_move_notification(ingest_doc, doc, "item:fetch")

        return id_of_fetched_items