Пример #1
0
 def testTagMedia(self):
     media_to_tag = 4451  # ESPN.com
     test_tag_id1 = '9172171'  # [email protected]:test_tag1
     tag_set_name = TEST_USER_EMAIL
     # add a tag
     desired_tag = MediaTag(media_to_tag, tag_set_name, 'test_tag1',
                            TAG_ACTION_ADD)
     response = self._mc.tagMedia([desired_tag])
     self.assertTrue('success' in response)
     self.assertEqual(response['success'], 1)
     # and check it
     story = self._mc.media(media_to_tag)
     tags_on_media = [
         t['tags_id'] for t in story['media_source_tags']
         if t['tag_set'] == tag_set_name
     ]
     self.assertTrue(int(test_tag_id1) in tags_on_media)
     # and remove it
     desired_tag = MediaTag(media_to_tag, tag_set_name, 'test_tag1',
                            TAG_ACTION_REMOVE)
     response = self._mc.tagMedia([desired_tag])
     self.assertTrue('success' in response)
     self.assertEqual(response['success'], 1)
     story = self._mc.media(media_to_tag)
     tags_on_media = [
         t['tags_id'] for t in story['media_source_tags']
         if t['tag_set'] == tag_set_name
     ]
     self.assertEqual(0, len(tags_on_media))
Пример #2
0
def remove_sources_from_collection(collection_id):
    source_ids_to_remove = request.form['sources[]'].split(',')
    source_ids_to_remove = [int(s) for s in source_ids_to_remove]
    user_mc = user_admin_mediacloud_client()
    # get the sources in the collection first, then remove and add as needed
    existing_source_ids = [
        int(m['media_id']) for m in media_with_tag(collection_id)
    ]
    source_ids_to_remain = list(
        set(existing_source_ids) - set(source_ids_to_remove))

    media_to_remove = [
        MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_REMOVE)
        for sid in source_ids_to_remove
    ]
    media_to_remain = [
        MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_ADD)
        for sid in source_ids_to_remain
    ]  # do I need to run similar or TAG_ACTION_REMOVE?
    current_media = media_to_remove + media_to_remain

    results = {}
    if len(current_media) > 0:
        results = user_mc.tagMedia(current_media)

    apicache.invalidate_collection_source_representation_cache(
        user_mediacloud_key(), collection_id)
    return jsonify(results)
Пример #3
0
def collection_update(collection_id):
    user_mc = user_admin_mediacloud_client()
    label = '{}'.format(request.form['name'])
    description = request.form['description']
    static = request.form['static'] if 'static' in request.form else None
    show_on_stories = request.form['showOnStories'] if 'showOnStories' in request.form else None
    show_on_media = request.form['showOnMedia'] if 'showOnMedia' in request.form else None

    formatted_name = format_name_from_label(label)

    source_ids = []
    if len(request.form['sources[]']) > 0:
        source_ids = [int(sid) for sid in request.form['sources[]'].split(',')]
    # first update the collection
    updated_collection = user_mc.updateTag(collection_id, formatted_name, label, description,
                                           is_static=(static == 'true'),
                                           show_on_stories=(show_on_stories == 'true'),
                                           show_on_media=(show_on_media == 'true'))
    # get the sources in the collection first, then remove and add as needed
    existing_source_ids = [int(m['media_id']) for m in media_with_tag(user_mediacloud_key(), collection_id)]
    source_ids_to_remove = list(set(existing_source_ids) - set(source_ids))
    source_ids_to_add = [sid for sid in source_ids if sid not in existing_source_ids]
    # logger.debug(existing_source_ids)
    # logger.debug(source_ids_to_add)
    # logger.debug(source_ids_to_remove)
    # then go through and tag all the sources specified with the new collection id
    tags_to_add = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_ADD) for sid in source_ids_to_add]
    tags_to_remove = [MediaTag(sid, tags_id=collection_id, action=TAG_ACTION_REMOVE) for sid in source_ids_to_remove]
    tags = tags_to_add + tags_to_remove
    if len(tags) > 0:
        user_mc.tagMedia(tags)
    return jsonify(updated_collection['tag'])
Пример #4
0
def collection_create():
    user_mc = user_admin_mediacloud_client(
    )  # has to be admin to call createTag
    label = '{}'.format(request.form['name'])
    description = request.form['description']
    static = request.form['static'] if 'static' in request.form else None
    show_on_stories = request.form[
        'showOnStories'] if 'showOnStories' in request.form else None
    show_on_media = request.form[
        'showOnMedia'] if 'showOnMedia' in request.form else None
    source_ids = []
    if len(request.form['sources[]']) > 0:
        source_ids = request.form['sources[]'].split(',')

    formatted_name = format_name_from_label(label)
    # first create the collection
    new_collection = user_mc.createTag(
        TAG_SETS_ID_COLLECTIONS,
        formatted_name,
        label,
        description,
        is_static=(static == 'true'),
        show_on_stories=(show_on_stories == 'true'),
        show_on_media=(show_on_media == 'true'))
    # then go through and tag all the sources specified with the new collection id
    tags = [
        MediaTag(sid,
                 tags_id=new_collection['tag']['tags_id'],
                 action=TAG_ACTION_ADD) for sid in source_ids
    ]
    if len(tags) > 0:
        user_mc.tagMedia(tags)
    return jsonify(new_collection['tag'])
Пример #5
0
def update_metadata_for_sources(source_list):
    tags = []
    for m in VALID_METADATA_IDS:
        mid = list(m.values())[0]
        mkey = list(m.keys())[0]
        tag_codes = tags_in_tag_set(TOOL_API_KEY, mid)
        for source in source_list:
            if mkey in source:
                metadata_tag_name = source[mkey]
                if metadata_tag_name not in ['', None]:
                    # hack until we have a better match check
                    if mkey == METADATA_PUB_COUNTRY_NAME:  # template pub_###
                        matching = [
                            t for t in tag_codes
                            if t['tag'] == 'pub_' + metadata_tag_name
                        ]
                    else:
                        matching = [
                            t for t in tag_codes
                            if t['tag'] == metadata_tag_name
                        ]

                    if matching and matching not in ['', None]:
                        metadata_tag_id = matching[0]['tags_id']
                        logger.debug('found metadata to add %s',
                                     metadata_tag_id)
                        tags.append(
                            MediaTag(source['media_id'],
                                     tags_id=metadata_tag_id,
                                     action=TAG_ACTION_ADD))
    # now do all the tags in parallel batches so it happens quickly
    if len(tags) > 0:
        chunks = [tags[x:x + 50] for x in range(0, len(tags), 50)
                  ]  # do 50 tags in each request
        _tag_media_job.map(chunks)
Пример #6
0
def update_metadata_for_sources(source_list):
    tags = []
    for m in VALID_METADATA_IDS:
        mid = m.values()[0]
        mkey = m.keys()[0]
        tag_codes = tags_in_tag_set(TOOL_API_KEY, mid)
        for source in source_list:
            if mkey in source:
                metadata_tag_name = source[mkey]
                if metadata_tag_name not in ['', None]:
                    # hack until we have a better match check
                    matching = []
                    if mkey == METADATA_PUB_COUNTRY_NAME:  # template pub_###
                        matching = [t for t in tag_codes if t['tag'] == 'pub_' + metadata_tag_name]
                    else:
                        matching = [t for t in tag_codes if t['tag'] == metadata_tag_name]

                    if matching and matching not in ['', None]:
                        metadata_tag_id = matching[0]['tags_id']
                        logger.debug('found metadata to add %s', metadata_tag_id)
                        tags.append(MediaTag(source['media_id'], tags_id=metadata_tag_id, action=TAG_ACTION_ADD))
    # now do all the tags in parallel batches so it happens quickly
    if len(tags) > 0:
        chunks = [tags[x:x + 50] for x in xrange(0, len(tags), 50)]  # do 50 tags in each request
        use_pool = True
        if use_pool:
            pool = Pool(processes=MEDIA_METADATA_UPDATE_POOL_SIZE )  # process updates in parallel with worker function
            pool.map(_tag_media_worker, chunks)  # blocks until they are all done
            pool.terminate()  # extra safe garbage collection
        else:
            [_tag_media_worker(job) for job in chunks]
Пример #7
0
def source_create():
    user_mc = user_admin_mediacloud_client()
    name = request.form['name']
    url = request.form['url']
    editor_notes = request.form[
        'editor_notes'] if 'editor_notes' in request.form else None  # this is optional
    public_notes = request.form[
        'public_notes'] if 'public_notes' in request.form else None
    monitored = request.form[
        'monitored'] if 'monitored' in request.form else None
    # parse out any tag to add (ie. collections and metadata)
    tag_ids_to_add = tag_ids_from_collections_param()
    valid_metadata = [{
        'form_key': 'publicationCountry',
        'tag_sets_id': TAG_SETS_ID_PUBLICATION_COUNTRY
    }, {
        'form_key': 'publicationState',
        'tag_sets_id': TAG_SETS_ID_PUBLICATION_STATE
    }, {
        'form_key': 'primaryLanguageg',
        'tag_sets_id': TAG_SETS_ID_PRIMARY_LANGUAGE
    }, {
        'form_key': 'countryOfFocus',
        'tag_sets_id': TAG_SETS_ID_COUNTRY_OF_FOCUS
    }, {
        'form_key': 'mediaType',
        'tag_sets_id': TAG_SETS_ID_MEDIA_TYPE
    }]
    source_to_create = {
        'name': name,
        'url': url,
        'editor_notes': editor_notes,
        'public_notes': public_notes,
        'is_monitored': monitored,
        'tags_ids': tag_ids_to_add
    }
    result = user_mc.mediaCreate([
        source_to_create
    ])[0]  # need just the first entry, since we only create one
    if result['status'] != "error":
        # if it worked, update any metadata, because we need to remove the other tags in each set
        for metadata_item in valid_metadata:
            metadata_tag_id = request.form[
                metadata_item['form_key']] if metadata_item[
                    'form_key'] in request.form else None  # this is optional
            if metadata_tag_id:
                user_mc.tagMedia(
                    tags=[
                        MediaTag(result['media_id'],
                                 tags_id=metadata_tag_id,
                                 action=TAG_ACTION_ADD)
                    ],
                    clear_others=True
                )  # make sure to clear any other values set in this metadata tag set
                tag_ids_to_add.append(metadata_tag_id)
    if result['status'] == 'new':
        # if it is a really new source, kick off a scraping job to find any RSS feeds
        user_mc.feedsScrape(result['media_id'])
    return jsonify(result)
Пример #8
0
def source_update(media_id):
    user_mc = user_admin_mediacloud_client()
    # update the basic info
    name = request.form['name']
    url = request.form['url']
    editor_notes = request.form[
        'editor_notes'] if 'editor_notes' in request.form else None  # this is optional
    public_notes = request.form[
        'public_notes'] if 'public_notes' in request.form else None  # this is optional
    monitored = request.form[
        'monitored'] if 'monitored' in request.form else None
    result = user_mc.mediaUpdate(
        media_id, {
            'url': url,
            'name': name,
            'editor_notes': editor_notes,
            'is_monitored': monitored,
            'public_notes': public_notes
        })
    # now we need to update the collections separately, because they are tags on the media source
    source = user_mc.media(media_id)
    existing_tag_ids = [
        t['tags_id'] for t in source['media_source_tags']
        if (t['tag_sets_id'] in VALID_COLLECTION_TAG_SETS_IDS)
    ]
    tag_ids_to_add = tag_ids_from_collections_param()
    tag_ids_to_remove = list(set(existing_tag_ids) - set(tag_ids_to_add))
    tags_to_add = [
        MediaTag(media_id, tags_id=cid, action=TAG_ACTION_ADD)
        for cid in tag_ids_to_add if cid not in existing_tag_ids
    ]
    tags_to_remove = [
        MediaTag(media_id, tags_id=cid, action=TAG_ACTION_REMOVE)
        for cid in tag_ids_to_remove
    ]
    tags = tags_to_add + tags_to_remove
    if len(tags) > 0:  # don't make extraneous calls
        user_mc.tagMedia(tags=tags)
    # now update the metadata too
    valid_metadata = [{
        'form_key': 'publicationCountry',
        'tag_sets_id': TAG_SETS_ID_PUBLICATION_COUNTRY
    }, {
        'form_key': 'publicationState',
        'tag_sets_id': TAG_SETS_ID_PUBLICATION_STATE
    }, {
        'form_key': 'primaryLanguage',
        'tag_sets_id': TAG_SETS_ID_PRIMARY_LANGUAGE
    }, {
        'form_key': 'countryOfFocus',
        'tag_sets_id': TAG_SETS_ID_COUNTRY_OF_FOCUS
    }, {
        'form_key': 'mediaType',
        'tag_sets_id': TAG_SETS_ID_MEDIA_TYPE
    }]
    for metadata_item in valid_metadata:
        metadata_tag_id = request.form[
            metadata_item['form_key']] if metadata_item[
                'form_key'] in request.form else None  # this is optional
        existing_tag_ids = [
            t for t in source['media_source_tags']
            if is_metadata_tag_set(t['tag_sets_id'])
        ]
        # form field check
        if metadata_tag_id in [None, '', 'null', 'undefined']:
            # we want to remove it if there was one there
            if len(existing_tag_ids) > 0:
                for remove_if_empty in existing_tag_ids:
                    if metadata_item['tag_sets_id'] == remove_if_empty[
                            'tag_sets_id']:
                        tag = MediaTag(media_id,
                                       tags_id=remove_if_empty['tags_id'],
                                       action=TAG_ACTION_REMOVE)
                        user_mc.tagMedia([tag])

        elif metadata_tag_id not in existing_tag_ids:
            # need to add it and clear out the other
            tag = MediaTag(media_id,
                           tags_id=metadata_tag_id,
                           action=TAG_ACTION_ADD)
            user_mc.tagMedia([tag], clear_others=True)
    # result the success of the media update call - would be better to catch errors in any of these calls...
    return jsonify(result)