def get_v1_params(): v1_params = reqparse.RequestParser() v1_params.add_argument('source', type=str, required=True) v1_params.add_argument('target', type=str, required=True) v1_params.add_argument( 'count', type=inputs.int_range(low=0, high=configuration.get_config_int( 'api', 'count_max')), required=False, default=configuration.get_config_int('api', 'count_default')) v1_params.add_argument('seed', type=inputs.regex(r'^([^|]+(\|[^|]+)*)?$'), required=False) v1_params.add_argument('include_pageviews', type=inputs.boolean, required=False, default=True) v1_params.add_argument( 'search', type=str, required=False, default='morelike', choices=['morelike', 'wiki', 'related_articles', 'category']) v1_params.add_argument('rank_method', type=str, required=False, default='default', choices=['default', 'sitelinks']) v1_params.add_argument('campaign', type=str, required=False, default='') return v1_params
def get_legacy_params(): legacy_params = reqparse.RequestParser() legacy_params.add_argument('s', type=str, dest='source', required=True) legacy_params.add_argument('t', type=str, dest='target', required=True) legacy_params.add_argument( 'n', type=inputs.int_range(low=0, high=configuration.get_config_int( 'api', 'count_max')), dest='count', required=False, default=configuration.get_config_int('api', 'count_default')) legacy_params.add_argument('article', type=inputs.regex(r'^([^|]+(\|[^|]+)*)?$'), dest='seed', required=False) legacy_params.add_argument('pageviews', type=inputs.boolean, dest='include_pageviews', required=False, default=True) legacy_params.add_argument('search', type=str, required=False, default='morelike', choices=['morelike', 'wiki']) return legacy_params
def get_pageview_query_url(source, title): start_days = configuration.get_config_int('single_article_pageviews', 'start_days') end_days = configuration.get_config_int('single_article_pageviews', 'end_days') query = configuration.get_config_value('single_article_pageviews', 'query') start = get_relative_timestamp(start_days) end = get_relative_timestamp(end_days) query = query.format(source=source, title=title, start=start, end=end) return query
def get_most_popular_articles(source, campaign=''): days = configuration.get_config_int('popular_pageviews', 'days') date_format = configuration.get_config_value('popular_pageviews', 'date_format') query = configuration.get_config_value('popular_pageviews', 'query') date = (datetime.datetime.utcnow() - datetime.timedelta(days=days)).strftime(date_format) query = query.format(source=source, date=date) try: data = get(query) except ValueError: log.info('pageview query failed') return [] if 'items' not in data or len( data['items']) < 1 or 'articles' not in data['items'][0]: log.info('pageview data is not in a known format') return [] articles = [] for article in data['items'][0]['articles']: articles.append({ 'title': article['article'], 'pageviews': article['views'] }) return articles
def get_v1_articles_params(): v1_articles_params = reqparse.RequestParser() v1_articles_params.add_argument('source', type=str, required=True) v1_articles_params.add_argument( 'count', type=inputs.int_range(low=0, high=configuration.get_config_int( 'api', 'count_max')), required=False, default=configuration.get_config_int('api', 'count_default')) v1_articles_params.add_argument('seed', type=inputs.regex(r'^[^|]+(\|[^|]+)*$'), required=True) return v1_articles_params
def chunk_query_for_parameter(params, parameter, values): """ This takes in general params for a query that needs to be performed for a set of values, and then adds a specified parameter with the chunked values until all the values have been in a query. Ex: chunk_query_for_parameter( {'foo': 'bar'}, 'additional', ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j']) results in the following queries if chunk_size is 3: query({'foo': 'bar', 'additional': 'a|b|c'}) query({'foo': 'bar', 'additional': 'd|e|f'}) query({'foo': 'bar', 'additional': 'g|h|i'}) query({'foo': 'bar', 'additional': 'j'}) the results are appended in the appropriate order and returned """ chunk_size = configuration.get_config_int('external_api_parameters', 'wikidata_chunk_size') param_groups = [] for group in itertools.zip_longest(*[iter(values)] * chunk_size): p = params.copy() p[parameter] = '|'.join(item for item in group if item is not None) param_groups.append(p) if param_groups: with multiprocessing.Pool(processes=len(param_groups)) as pool: result = pool.map(query, param_groups) return list(itertools.chain(*result)) else: return []
def query_pageviews(self, s): """ Query pageview API and parse results """ days = configuration.get_config_int('popular_pageviews', 'days') date_format = configuration.get_config_value('popular_pageviews', 'date_format') query = configuration.get_config_value('popular_pageviews', 'query') date = (datetime.datetime.utcnow() - datetime.timedelta(days=days)).strftime(date_format) query = query.format(source=s, date=date) try: data = data_fetcher.get(query) except ValueError: return [] article_pv_tuples = [] try: for d in data['items'][0]['articles']: article_pv_tuples.append((d['article'], d['views'])) except: log.info('Could not get most popular articles for %s from pageview API. Try using a seed article.', s) return article_pv_tuples
args = get_v1_articles_params().parse_args() recs = process_request(args) if len(recs) == 0: abort_no_candidates() return recs ItemSpec = collections.namedtuple('Item', ['wikidata_id', 'score']) v1_items_params = reqparse.RequestParser() v1_items_params.add_argument('seed', type=str, required=True) v1_items_params.add_argument( 'count', type=inputs.int_range(low=0, high=configuration.get_config_int( 'api', 'count_max')), required=False, default=configuration.get_config_int('api', 'count_default')) v1_items_model = v1.model( ItemSpec.__name__, ItemSpec(wikidata_id=fields.String(description='wikidata_id', required=True), score=fields.Float(description='score', required=True))._asdict()) v1_items_doc = dict( description= 'Gets recommendations of Wikidata items that are related to a seed item', params=dict(seed='Seed Wikidata item', count='Number of recommendations to fetch'))