Пример #1
0
def update_metadata_for_sources(source_list):
    tags = []
    for m in VALID_METADATA_IDS:
        mid = list(m.values())[0]
        mkey = list(m.keys())[0]
        tag_codes = tags_in_tag_set(TOOL_API_KEY, mid)
        for source in source_list:
            if mkey in source:
                metadata_tag_name = source[mkey]
                if metadata_tag_name not in ['', None]:
                    # hack until we have a better match check
                    if mkey == METADATA_PUB_COUNTRY_NAME:  # template pub_###
                        matching = [
                            t for t in tag_codes
                            if t['tag'] == 'pub_' + metadata_tag_name
                        ]
                    else:
                        matching = [
                            t for t in tag_codes
                            if t['tag'] == metadata_tag_name
                        ]

                    if matching and matching not in ['', None]:
                        metadata_tag_id = matching[0]['tags_id']
                        logger.debug('found metadata to add %s',
                                     metadata_tag_id)
                        tags.append(
                            MediaTag(source['media_id'],
                                     tags_id=metadata_tag_id,
                                     action=TAG_ACTION_ADD))
    # now do all the tags in parallel batches so it happens quickly
    if len(tags) > 0:
        chunks = [tags[x:x + 50] for x in range(0, len(tags), 50)
                  ]  # do 50 tags in each request
        _tag_media_job.map(chunks)
Пример #2
0
def update_metadata_for_sources(source_list):
    tags = []
    for m in VALID_METADATA_IDS:
        mid = m.values()[0]
        mkey = m.keys()[0]
        tag_codes = tags_in_tag_set(TOOL_API_KEY, mid)
        for source in source_list:
            if mkey in source:
                metadata_tag_name = source[mkey]
                if metadata_tag_name not in ['', None]:
                    # hack until we have a better match check
                    matching = []
                    if mkey == METADATA_PUB_COUNTRY_NAME:  # template pub_###
                        matching = [t for t in tag_codes if t['tag'] == 'pub_' + metadata_tag_name]
                    else:
                        matching = [t for t in tag_codes if t['tag'] == metadata_tag_name]

                    if matching and matching not in ['', None]:
                        metadata_tag_id = matching[0]['tags_id']
                        logger.debug('found metadata to add %s', metadata_tag_id)
                        tags.append(MediaTag(source['media_id'], tags_id=metadata_tag_id, action=TAG_ACTION_ADD))
    # now do all the tags in parallel batches so it happens quickly
    if len(tags) > 0:
        chunks = [tags[x:x + 50] for x in xrange(0, len(tags), 50)]  # do 50 tags in each request
        use_pool = True
        if use_pool:
            pool = Pool(processes=MEDIA_METADATA_UPDATE_POOL_SIZE )  # process updates in parallel with worker function
            pool.map(_tag_media_worker, chunks)  # blocks until they are all done
            pool.terminate()  # extra safe garbage collection
        else:
            [_tag_media_worker(job) for job in chunks]
Пример #3
0
def media_type_story_counts(topics_id):
    tag_story_counts = []
    media_type_tags = tags_in_tag_set(TOOL_API_KEY, TAG_SETS_ID_MEDIA_TYPE)
    # grab the total stories
    total_stories = topic_story_count(user_mediacloud_key(),
                                      topics_id)['count']
    # make a count for each tag based on media_id
    for tag in media_type_tags:
        query_clause = "tags_id_media:{}".format(tag['tags_id'])
        tagged_story_count = topic_story_count(user_mediacloud_key(),
                                               topics_id,
                                               q=query_clause)['count']
        tag_story_counts.append({
            'label':
            tag['label'],
            'tags_id':
            tag['tags_id'],
            'count':
            tagged_story_count,
            'pct':
            float(tagged_story_count) / float(total_stories)
            if total_stories > 0 else 0,  # protect against div by zero
        })

    return jsonify({'story_counts': tag_story_counts})
def update_metadata_for_sources(source_list):
    tags = []
    for m in VALID_METADATA_IDS:
        mid = list(m.values())[0]
        mkey = list(m.keys())[0]
        tag_codes = tags_in_tag_set(TOOL_API_KEY, mid)
        for source in source_list:
            if mkey in source:
                metadata_tag_name = source[mkey]
                if metadata_tag_name not in ['', None]:
                    # hack until we have a better match check
                    if mkey == METADATA_PUB_COUNTRY_NAME:  # template pub_###
                        matching = [t for t in tag_codes if t['tag'] == 'pub_' + metadata_tag_name]
                    else:
                        matching = [t for t in tag_codes if t['tag'] == metadata_tag_name]

                    if matching and matching not in ['', None]:
                        metadata_tag_id = matching[0]['tags_id']
                        logger.debug('found metadata to add %s', metadata_tag_id)
                        tags.append(MediaTag(source['media_id'], tags_id=metadata_tag_id, action=TAG_ACTION_ADD))
    # now do all the tags in parallel batches so it happens quickly
    if len(tags) > 0:
        chunks = [tags[x:x + 50] for x in range(0, len(tags), 50)]  # do 50 tags in each request
        use_pool = False
        if use_pool:
            pool = Pool(processes=MEDIA_METADATA_UPDATE_POOL_SIZE)  # process updates in parallel with worker function
            pool.map(_tag_media_worker, chunks)  # blocks until they are all done
            pool.terminate()  # extra safe garbage collection
        else:
            [_tag_media_worker(job) for job in chunks]
Пример #5
0
def media_type_coverage(topics_id):
    media_type_tags = tags_in_tag_set(TOOL_API_KEY, TAG_SETS_ID_MEDIA_TYPE)
    # grab the total stories
    total_stories = topic_story_count(user_mediacloud_key(), topics_id)['count']
    # count the stories in any media in tagged as media_type
    tags_ids = " ".join(str(tag['tags_id']) for tag in media_type_tags)
    query_clause = "tags_id_media:({})".format(tags_ids)
    tagged_story_count = topic_story_count(user_mediacloud_key(), topics_id, q=query_clause)['count']
    return jsonify({'counts': {'count': tagged_story_count, 'total': total_stories}})
Пример #6
0
def media_type_coverage(topics_id):
    media_type_tags = tags_in_tag_set(TOOL_API_KEY, TAG_SETS_ID_MEDIA_TYPE)
    # grab the total stories
    total_stories = topic_story_count(user_mediacloud_key(),
                                      topics_id)['count']
    # count the stories in any media in tagged as media_type
    tags_ids = " ".join(str(tag['tags_id']) for tag in media_type_tags)
    query_clause = "tags_id_media:({})".format(tags_ids)
    tagged_story_count = topic_story_count(user_mediacloud_key(),
                                           topics_id,
                                           q=query_clause)['count']
    return jsonify(
        {'counts': {
            'count': tagged_story_count,
            'total': total_stories
        }})
Пример #7
0
def media_type_story_counts(topics_id):
    tag_story_counts = []
    media_type_tags = tags_in_tag_set(TOOL_API_KEY, TAG_SETS_ID_MEDIA_TYPE)
    # grab the total stories
    total_stories = topic_story_count(user_mediacloud_key(), topics_id)['count']
    # make a count for each tag based on media_id
    for tag in media_type_tags:
        query_clause = "tags_id_media:{}".format(tag['tags_id'])
        tagged_story_count = topic_story_count(user_mediacloud_key(), topics_id, q=query_clause)['count']
        tag_story_counts.append({
            'label': tag['label'],
            'tags_id': tag['tags_id'],
            'count': tagged_story_count,
            'pct': float(tagged_story_count)/float(total_stories)
        })

    return jsonify({'story_counts': tag_story_counts})
Пример #8
0
def create_media_type_focal_set(topics_id):
    user_mc = user_mediacloud_client()
    # grab the focalSetName and focalSetDescription and then make one
    focal_set_name = request.form['focalSetName']
    focal_set_description = request.form['focalSetDescription']
    media_type_tags = tags_in_tag_set(TOOL_API_KEY, TAG_SETS_ID_MEDIA_TYPE)
    focal_technique = FOCAL_TECHNIQUE_BOOLEAN_QUERY
    new_focal_set = user_mc.topicFocalSetDefinitionCreate(topics_id, focal_set_name, focal_set_description, focal_technique)
    if 'focal_set_definitions_id' not in new_focal_set:
        return json_error_response('Unable to create the subtopic set')
    # now make the foci in it - one for each media type
    focus_def_results = []
    for tag in media_type_tags:
        params = {
            'name': tag['label'],
            'description': "Stories from {} sources".format(tag['label']),
            'query': "tags_id_media:{}".format(tag['tags_id']),
            'focal_set_definitions_id': new_focal_set['focal_set_definitions_id'],
        }
        result = user_mc.topicFocusDefinitionCreate(topics_id, **params)
        focus_def_results.append(result)
    return {'success': True}
Пример #9
0
def create_media_type_focal_set(topics_id):
    user_mc = user_mediacloud_client()
    # grab the focalSetName and focalSetDescription and then make one
    focal_set_name = request.form['focalSetName']
    focal_set_description = request.form['focalSetDescription']
    media_type_tags = tags_in_tag_set(TOOL_API_KEY, TAG_SETS_ID_MEDIA_TYPE)
    focal_technique = FOCAL_TECHNIQUE_BOOLEAN_QUERY
    new_focal_set = user_mc.topicFocalSetDefinitionCreate(
        topics_id, focal_set_name, focal_set_description, focal_technique)
    if 'focal_set_definitions_id' not in new_focal_set:
        return json_error_response('Unable to create the subtopic set')
    # now make the foci in it - one for each media type
    focus_def_results = []
    for tag in media_type_tags:
        params = {
            'name': tag['label'],
            'description': "Stories from {} sources".format(tag['label']),
            'query': "tags_id_media:{}".format(tag['tags_id']),
            'focal_set_definitions_id':
            new_focal_set['focal_set_definitions_id'],
        }
        result = user_mc.topicFocusDefinitionCreate(topics_id, **params)
        focus_def_results.append(result)
    return {'success': True}
Пример #10
0
def _cached_media_tags(tag_sets_id):
    partisanship_tags = tags_in_tag_set(TOOL_API_KEY, tag_sets_id)
    for tag in partisanship_tags:
        tag['query'] = "tags_id_media:{}".format(tag['tags_id'])
    return partisanship_tags
Пример #11
0
def get_media_types():
    media_type_tags = tags_in_tag_set(TOOL_API_KEY, TAG_SETS_ID_MEDIA_TYPE)
    return jsonify({'list': media_type_tags})