Пример #1
0
def api_explorer_story_split_count_csv():
    filename = 'stories-over-time'
    data = request.form
    if 'searchId' in data:
        solr_q, solr_fq = parse_as_sample(data['searchId'], data['index'])
        filename = filename  # don't have this info + current_query['q']
        SAMPLE_SEARCHES = load_sample_searches()
        queries = SAMPLE_SEARCHES[data['searchId']]['queries']
    else:
        queries = json.loads(data['queries'])
    label = " ".join([q['label'] for q in queries])
    filename = file_name_for_download(label, filename)
    # now compute total attention for all results
    story_count_results = []
    for q in queries:
        solr_q, solr_fq = parse_query_with_keywords(q)
        solr_open_query = concatenate_query_for_solr(solr_seed_query='*', media_ids=q['sources'],
                                                     tags_ids=q['collections'])
        story_counts = apicache.normalized_and_story_count(solr_q, solr_fq, solr_open_query)
        story_count_results.append({
            'date': q['startDate'],
            'query': q['label'],
            'matching_stories': story_counts['total'],
            'total_stories': story_counts['normalized_total'],
            'ratio': float(story_counts['total']) / float(story_counts['normalized_total'])
        })
    props = ['date','query', 'matching_stories', 'total_stories', 'ratio']
    return csv.stream_response(story_count_results, props, filename)
Пример #2
0
def _get_word_count():
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None
    sample_size = int(request.args['sampleSize']) if 'sampleSize' in request.args else WORD_COUNT_SAMPLE_SIZE
    if search_id not in [None, -1]:
        solr_q, solr_fq = parse_as_sample(search_id, request.args['index'])
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)
    word_data = query_wordcount(solr_q, solr_fq, sample_size=sample_size)
    # return combined data
    return jsonify({"results": word_data, "sample_size": str(sample_size)})
Пример #3
0
def api_explorer_demo_geotag_count():
    search_id = int(
        request.args['search_id']) if 'search_id' in request.args else None
    if search_id not in [None, -1]:
        solr_q, solr_fq = parse_as_sample(search_id, request.args['index'])
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)
    data = apicache.top_tags_with_coverage(solr_q, solr_fq, tags.GEO_TAG_SET)
    data['results'] = _filter_for_countries(data['results'])
    return jsonify(data)
Пример #4
0
def demo_top_tags_with_coverage(tag_sets_id, ):
    # parses the query for you
    search_id = int(
        request.args['search_id']) if 'search_id' in request.args else None
    query_index = int(
        request.args['index']) if 'index' in request.args else None
    if (query_index is None) and (search_id not in [None, -1]):
        solr_q, solr_fq = parse_as_sample(search_id, request.args)
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)
    return apicache.top_tags_with_coverage(solr_q, solr_fq, tag_sets_id)
Пример #5
0
def demo_top_tags_with_coverage(tag_sets_id,):
    # parses the query for you
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None
    query_index = int(request.args['index']) if 'index' in request.args else None
    if (query_index is None )and (search_id not in [None, -1]):
        sample_searches = load_sample_searches()
        current_search = sample_searches[search_id]['queries']
        solr_q, solr_fq = parse_as_sample(search_id, request.args)
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)
    return apicache.top_tags_with_coverage(solr_q, solr_fq, tag_sets_id)
Пример #6
0
def explorer_wordcount_csv():
    data = request.form
    ngram_size = data[
        'ngramSize'] if 'ngramSize' in data else 1  # defaul to words if ngram not specified
    filename = u'sampled-ngrams-{}'.format(ngram_size)
    if 'searchId' in data:
        solr_q, solr_fq = parse_as_sample(data['searchId'], data['index'])
    else:
        query_object = json.loads(data['q'])
        solr_q, solr_fq = parse_query_with_keywords(query_object)
        filename = file_name_for_download(query_object['label'], filename)
    return stream_wordcount_csv(filename, solr_q, solr_fq, ngram_size)
Пример #7
0
def explorer_wordcount_csv():
    data = request.form
    ngram_size = data['ngramSize'] if 'ngramSize' in data else 1    # defaul to words if ngram not specified
    sample_size = data['sample_size'] if 'sample_size' in data else WORD_COUNT_SAMPLE_SIZE
    filename = 'sampled-{}-ngrams-{}'.format(sample_size, ngram_size)
    if 'searchId' in data:
        solr_q, solr_fq = parse_as_sample(data['searchId'], data['index'])
    else:
        query_object = json.loads(data['q'])
        solr_q, solr_fq = parse_query_with_keywords(query_object)
        filename = file_name_for_download(query_object['label'], filename)
    return stream_wordcount_csv(filename, solr_q, solr_fq, ngram_size, sample_size)
Пример #8
0
def explorer_entities_csv(tag_sets_id):
    tag_set = apicache.tag_set(tag_sets_id)
    filename = u'sampled-{}'.format(tag_set['label'])
    data = request.form
    if 'searchId' in data:
        solr_q, solr_fq = parse_as_sample(data['searchId'], data['index'])
    else:
        query_object = json.loads(data['q'])
        solr_q, solr_fq = parse_query_with_keywords(query_object)
        filename = file_name_for_download(query_object['label'], filename)
    top_tag_counts = apicache.top_tags_with_coverage(solr_q, solr_fq, tag_sets_id, TAG_COUNT_DOWNLOAD_LENGTH)['results']
    return csv.stream_response(top_tag_counts, ENTITY_DOWNLOAD_COLUMNS, filename)
Пример #9
0
def explorer_entities_csv(tag_sets_id):
    tag_set = apicache.tag_set(tag_sets_id)
    filename = 'sampled-{}'.format(tag_set['label'])
    data = request.form
    if 'searchId' in data:
        solr_q, solr_fq = parse_as_sample(data['searchId'], data['index'])
    else:
        query_object = json.loads(data['q'])
        solr_q, solr_fq = parse_query_with_keywords(query_object)
        filename = file_name_for_download(query_object['label'], filename)
    top_tag_counts = apicache.top_tags_with_coverage(solr_q, solr_fq, tag_sets_id, TAG_COUNT_DOWNLOAD_LENGTH)['results']
    return csv.stream_response(top_tag_counts, ENTITY_DOWNLOAD_COLUMNS, filename)
Пример #10
0
def api_explorer_demo_story_sample():
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None
    if search_id not in [None, -1]:
        sample_searches = load_sample_searches()
        current_search = sample_searches[search_id]['queries']
        solr_q, solr_fq = parse_as_sample(search_id, request.args['index'])
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)

    story_sample_result = apicache.random_story_list(solr_q, solr_fq, SAMPLE_STORY_COUNT)
    for story in story_sample_result:
        story["media"] = server.views.apicache.media(story["media_id"])
    return jsonify({"results": story_sample_result})
Пример #11
0
def _get_word_count():
    search_id = int(
        request.args['search_id']) if 'search_id' in request.args else None
    sample_size = int(
        request.args['sampleSize']
    ) if 'sampleSize' in request.args else WORD_COUNT_SAMPLE_SIZE
    if search_id not in [None, -1]:
        solr_q, solr_fq = parse_as_sample(search_id, request.args['index'])
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)
    word_data = query_wordcount(solr_q, solr_fq, sample_size=sample_size)
    # return combined data
    return jsonify({"results": word_data, "sample_size": str(sample_size)})
Пример #12
0
def explorer_geo_csv():
    filename = u'sampled-geographic-coverage'
    data = request.form
    if 'searchId' in data:
        solr_q, solr_fq = parse_as_sample(data['searchId'], data['index'])
    else:
        query_object = json.loads(data['q'])
        solr_q, solr_fq = parse_query_with_keywords(query_object)
        filename = file_name_for_download(query_object['label'], filename)
    data = apicache.top_tags_with_coverage(solr_q, solr_fq, tags.GEO_TAG_SET)
    data['results'] = _filter_for_countries(data['results'])
    props = ['label', 'count', 'pct', 'alpha3', 'iso-a2', 'geonamesId', 'tags_id', 'tag']
    return csv.stream_response(data['results'], props, filename)
Пример #13
0
def explorer_stories_csv():
    filename = 'sampled-stories'
    data = request.form
    if 'searchId' in data:
        solr_q, solr_fq = parse_as_sample(data['searchId'], data['uid'])
        filename = filename  # don't have this info + current_query['q']
        # for demo users we only download 100 random stories (ie. not all matching stories)
        return _stream_story_list_csv(filename, solr_q, solr_fq, 100, MediaCloud.SORT_RANDOM, 1)
    else:
        query_object = json.loads(data['q'])
        solr_q, solr_fq = parse_query_with_keywords(query_object)
        filename = file_name_for_download(query_object['label'], filename)
        # now page through all the stories and download them
        return _stream_story_list_csv(filename, solr_q, solr_fq)
def explorer_stories_csv():
    filename = u'sampled-stories'
    data = request.form
    if 'searchId' in data:
        solr_q, solr_fq = parse_as_sample(data['searchId'], data['index'])
        filename = filename  # don't have this info + current_query['q']
        # for demo users we only download 100 random stories (ie. not all matching stories)
        return _stream_story_list_csv(filename, solr_q, solr_fq, 100, MediaCloud.SORT_RANDOM, 1)
    else:
        query_object = json.loads(data['q'])
        solr_q, solr_fq = parse_query_with_keywords(query_object)
        filename = file_name_for_download(query_object['label'], filename)
        # now page through all the stories and download them
        return _stream_story_list_csv(filename, solr_q, solr_fq)
Пример #15
0
def api_explorer_demo_story_split_count():
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None

    if isinstance(search_id, int) and search_id not in [None, -1]:
        solr_q, solr_fq = parse_as_sample(search_id, request.args['index'])
    else:
        start_date, end_date = parse_query_dates(request.args)
        solr_q, solr_fq = parse_query_with_keywords(request.args)
    # why is this call fundamentally different than the cache call???
    solr_open_query = concatenate_query_for_solr(solr_seed_query='*',
                                                 media_ids=[],
                                                 tags_ids=DEFAULT_COLLECTION_IDS)
    results = apicache.normalized_and_story_split_count(solr_q, solr_open_query, start_date, end_date)

    return jsonify({'results': results})
Пример #16
0
def api_explorer_demo_story_split_count():
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None

    if isinstance(search_id, int) and search_id not in [None, -1]:
        SAMPLE_SEARCHES = load_sample_searches()
        current_search = SAMPLE_SEARCHES[search_id]['queries']
        solr_q, solr_fq = parse_as_sample(search_id, request.args['index'])
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)
    # why is this call fundamentally different than the cache call???
    solr_open_query = concatenate_query_for_solr(solr_seed_query='*',
                                                 media_ids=[],
                                                 tags_ids=DEFAULT_COLLECTION_IDS)
    results = apicache.normalized_and_story_split_count(solr_q, solr_fq, solr_open_query)

    return jsonify({'results': results})
Пример #17
0
def api_explorer_story_split_count():
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None
    start_date, end_date = parse_query_dates(request.args)
    if only_queries_reddit(request.args):
        results = pushshift.reddit_submission_normalized_and_split_story_count(query=request.args['q'],
                                                                               start_date=start_date, end_date=end_date,
                                                                               subreddits=pushshift.NEWS_SUBREDDITS)
    else:
        # get specific stories by keyword
        if isinstance(search_id, int) and search_id not in [None, -1]:
            solr_q, solr_fq = parse_as_sample(search_id, request.args['index'])
        else:
            solr_q, solr_fq = parse_query_with_keywords(request.args)
        # get all the stories (no keyword) so we can support normalization
        solr_open_query = concatenate_query_for_solr(solr_seed_query='*',
                                                     media_ids=request.args['sources'],
                                                     tags_ids=request.args['collections'])
        results = apicache.normalized_and_story_split_count(solr_q, solr_open_query, start_date, end_date)
    return jsonify({'results': results})
Пример #18
0
def api_explorer_story_split_count_csv():
    filename = u'stories-over-time'
    data = request.form
    if 'searchId' in data:
        solr_q, solr_fq = parse_as_sample(data['searchId'], data['index'])
        filename = filename  # don't have this info + current_query['q']
        # TODO solr_open_query
    else:
        query_object = json.loads(data['q'])
        solr_q, solr_fq = parse_query_with_keywords(query_object)
        filename = file_name_for_download(query_object['label'], filename)
    solr_open_query = concatenate_query_for_solr(
        solr_seed_query='*',
        media_ids=query_object['sources'],
        tags_ids=query_object['collections'])
    results = apicache.normalized_and_story_split_count(
        solr_q, solr_fq, solr_open_query)
    props = ['date', 'count', 'total_count', 'ratio']
    return csv.stream_response(results['counts'], props, filename)
Пример #19
0
def api_explorer_story_split_count():
    search_id = int(request.args['search_id']) if 'search_id' in request.args else None
    index = int(request.args['index']) if 'index' in request.args else None

    #get specific stories by keyword
    if isinstance(search_id, int) and search_id not in [None, -1]:
        SAMPLE_SEARCHES = load_sample_searches()
        current_search = SAMPLE_SEARCHES[search_id]['queries']
        solr_q, solr_fq = parse_as_sample(search_id, request.args['index'])
    else:
        solr_q, solr_fq = parse_query_with_keywords(request.args)

    # get all the stories (no keyword)
    solr_open_query = concatenate_query_for_solr(solr_seed_query='*',
                                                 media_ids=request.args['sources'],
                                                 tags_ids=request.args['collections'])
    results = apicache.normalized_and_story_split_count(solr_q, solr_fq, solr_open_query)

    return jsonify({'results': results})