def query_network(): """Handle API request '/network'. API Request Parameters ---------------------- ids : list of int nodes_limit : int edges_limit : int include_user_mentions : bool API Response Keys ----------------- status : string num_of_entries : int edges : dict canonical_url : string date_published : string formatted datetime domain : string from_user_id : string from_user_screen_name : string id : int is_mention : bool site_type : {'claim', 'fact_checking'} title : string to_user_id : string to_user_screen_name : string tweet_created_at : string formatted datetime tweet_id: string tweet_type: {'origin', 'retweet', 'quote', 'reply'} """ lucene.getVMEnv().attachCurrentThread() q_network_schema = Schema({ 'ids': Use(flask.json.loads), Optional('nodes_limit', default=1000): And(Use(int), lambda i: i > 0), Optional('edges_limit', default=12500): And(Use(int), lambda i: i > 0), Optional('include_user_mentions', default=True): And( unicode, Use(lambda s: s.lower()), lambda s: s in ('true', 'false'), Use(lambda s: True if s == 'true' else False)), }) q_kwargs = copy_req_args(request.args) try: q_kwargs = q_network_schema.validate(q_kwargs) df = db_query_network(engine, **q_kwargs) if len(df) == 0: raise APINoResultError('No edge could be built!') response = dict(status='OK', num_of_entries=len(df), edges=flask.json.loads(df.to_json(**TO_JSON_KWARGS))) except SchemaError as e: response = dict(status='ERROR', error=str(e)) except APINoResultError as e: response = dict(status='No result error', error=str(e)) except Exception as e: logger.exception(e) response = dict(status='ERROR', error='Server error, query failed') return flask.jsonify(response)
def query_top_spreaders(): """Handle API request '/top-user'. API Request Parameters ---------------------- upper_day : string formatted datetime most_recent : bool API Response Keys ----------------- status : string num_of_entries : int spreaders : dict bot_score : float number_of_tweets : int site_type : {'claim', 'fact_checking'} spreading_type : {'active', 'influencial'} upper_day : string formatted datetime user_id : int user_raw_id : string user_screen_name : string """ lucene.getVMEnv().attachCurrentThread() yesterday = datetime.utcnow().date() - timedelta(days=1) yesterday = yesterday.strftime('%Y-%m-%d') q_top_spreaders_schema = Schema({ Optional('upper_day', default=yesterday): And(Regex('^\d{4}-\d{2}-\d{2}$'), Use(dateutil.parser.parse), error='Invalid date, should be yyyy-mm-dd format'), Optional('most_recent', default=True): And(unicode, Use(lambda s: s.lower()), lambda s: s in ('true', 'false'), Use(lambda s: True if s == 'true' else False)), }) q_kwargs = copy_req_args(request.args) try: q_kwargs = q_top_spreaders_schema.validate(q_kwargs) df = db_query_top_spreaders(engine, **q_kwargs) if len(df) == 0: raise APINoResultError('No top spreader found!') response = dict( status='OK', num_of_entries=len(df), spreaders=flask.json.loads(df.to_json(**TO_JSON_KWARGS))) except SchemaError as e: response = dict(status='ERROR', error=str(e)) except APINoResultError as e: response = dict(status='No result error', error=str(e)) except Exception as e: logger.exception(e) response = dict(status='ERROR', error='Server error, query failed') return flask.jsonify(response)
def query_top_articles(): """Handle API request 'top-articles' API Request Parameters ---------------------- upper_day : string formatted datetime most_recent : bool API Response Keys ----------------- status : string num_of_entries : int articles : dict canonical_url : string date_captured : string formatted datetime number_of_tweets : int site_type : {'claim', 'fact_checking'} title : string upper_day : string formatted datetime """ lucene.getVMEnv().attachCurrentThread() yesterday = datetime.utcnow().date() - timedelta(days=1) yesterday = yesterday.strftime('%Y-%m-%d') q_top_article_schema = Schema({ Optional('upper_day', default=yesterday): And(Regex('^\d{4}-\d{2}-\d{2}$'), Use(dateutil.parser.parse), error='Invalid date, shoul be yyyy-mm-dd format'), Optional('most_recent', default=True): And(unicode, Use(lambda s: s.lower()), lambda s: s in ('true', 'false'), Use(lambda s: True if s == 'true' else False)), Optional('exclude_tags', default=[]): And(Use(eval), error='Invalid exclude_tags input format'), }) q_kwargs = copy_req_args(request.args) try: q_kwargs = q_top_article_schema.validate(q_kwargs) df = db_query_top_articles(engine, **q_kwargs) if len(df) == 0: raise APINoResultError('No top article found!') response = dict( status='OK', num_of_entries=len(df), articles=flask.json.loads(df.to_json(**TO_JSON_KWARGS))) except SchemaError as e: response = dict(status='ERROR', error=str(e)) except APINoResultError as e: response = dict(status='No result error', error=str(e)) except Exception as e: logger.exception(e) response = dict(status='ERROR', error='Server error, query failed') return flask.jsonify(response)
def query_tweets(): """Handle API '/tweets'. API Request Parameters ---------------------- ids : list of int API Response Keys ----------------- status : string num_of_entries : int tweets : dict canonical_url : string domain : string id : int date_published : string formatted datetime site_type : {'claim', 'fact_checking'} title : string tweet_created_at : string formatted datetime tweet_id : string """ lucene.getVMEnv().attachCurrentThread() q_tweets_schema = Schema({ 'ids': And(Use(flask.json.loads, error="Format error of `ids`"), lambda s: len(s) > 0, error='Empty of `ids`'), }) q_kwargs = copy_req_args(request.args) try: q_kwargs = q_tweets_schema.validate(q_kwargs) df = db_query_tweets(engine, q_kwargs['ids']) if len(df) == 0: raise APINoResultError('No tweet found!') response = dict( status='OK', num_of_entries=len(df), tweets=flask.json.loads(df.to_json(**TO_JSON_KWARGS))) except SchemaError as e: response = dict(status='ERROR', error=str(e)) except APINoResultError as e: response = dict(status='No result error', error=str(e)) except Exception as e: logger.exception(e) response = dict(status='ERROR', error='Server error, query failed') return flask.jsonify(response)
def query_timeline(): """Handle API '/timeline'. API Request Parameters ---------------------- ids : list of int resolution : character in 'HDWM' API Response Keys ----------------- status : string timeline : dict claim : dict timestamp : list of string formatted datetime volume : list of int fact_checking : dict timestamp : list of string formatted datetime volume : list of int """ lucene.getVMEnv().attachCurrentThread() q_tweets_schema = Schema({ 'ids': And(Use(flask.json.loads, error="Format error of `ids`"), lambda s: len(s) > 0, error='Empty of `ids`'), Optional('resolution', default='D'): And(Use(lambda s: s.upper()), lambda s: s in 'HDWM'), }) q_kwargs = copy_req_args(request.args) try: q_kwargs = q_tweets_schema.validate(q_kwargs) rule = '1' + q_kwargs.pop('resolution') df = db_query_tweets(engine, q_kwargs['ids']) if len(df) == 0: raise APINoResultError('No tweet found!') df = df.set_index('tweet_created_at') df1 = df.loc[df['site_type'] == N_FACT_CHECKING] s1 = df1['tweet_id'].drop_duplicates() s1 = s1.resample(rule).count() df2 = df.loc[df['site_type'] == N_CLAIM] s2 = df2['tweet_id'].drop_duplicates() s2 = s2.resample(rule).count() s1, s2 = s1.align(s2, join='outer', fill_value=0) s1 = s1.cumsum() s2 = s2.cumsum() response = dict( status='OK', timeline=dict( fact_checking=dict( timestamp=s1.index.strftime('%Y-%m-%dT%H:%M:%SZ').tolist(), volume=s1.tolist()), claim=dict( timestamp=s2.index.strftime('%Y-%m-%dT%H:%M:%SZ').tolist(), volume=s2.tolist()))) except SchemaError as e: response = dict(status='ERROR', error=str(e)) except APINoResultError as e: response = dict(status='No result error', error=str(e)) except Exception as e: logger.exception(e) response = dict(status='ERROR', error='Server error, query failed') return flask.jsonify(response)
def query_latest_articles(): """Handle API request '/latest-articles'. API Request Parameters ---------------------- past_hours : int Set the hours from now to past to be defined as latest hours. domains : object If None, return all articles in the latest hours; If str, should be one of {'fact_checking', 'claim', 'fake'}: if 'fact_checking', return fact checking articles, if 'claim', return claim articles, if 'fake', return selected fake articles, which is a subset of claim, which is selected by us. If array of domain, return articles belonging to these domains. domains_file : str When `domains` is 'fake', the actual used domains are loaded from file `domains_file`. If this file doesn't exist, then `claim` type domains would be used. API Response Keys ----------------- status : string num_of_entries : int articles : dict keys are: canonical_url : string date_published : string formatted datetime domain : string id : int site_type : {'claim', 'fact_checking'} title : string """ lucene.getVMEnv().attachCurrentThread() # Validate input of request q_articles_schema = Schema({ 'past_hours': And(Use(int), lambda x: x > 0, error='Invalid value of `past_hours`'), Optional('domains', default=None): Or(lambda s: s in ('fact_checking', 'claim', 'fake'), Use(flask.json.loads, error='Not valid values nor JSON string of `domains`')) }) q_kwargs = copy_req_args(request.args) try: q_kwargs = q_articles_schema.validate(q_kwargs) domains_file = CONF['api'].get('selected_fake_domains_path') df = db_query_latest_articles( engine, domains_file=domains_file, **q_kwargs) if len(df) == 0: raise APINoResultError('No articles found!') response = dict( status='OK', num_of_entries=len(df), articles=flask.json.loads(df.to_json(**TO_JSON_KWARGS))) except SchemaError as e: response = dict(status='Parameter error', error=str(e)) except APIParseError as e: response = dict(status='Invalide query', error=str(e)) except APINoResultError as e: response = dict(status='No result error', error=str(e)) except Exception as e: logger.exception(e) response = dict(status='ERROR', error='Server error, query failed!') return flask.jsonify(response)
def query_articles(): """Handle API request '/articles'. API Request Parameters ---------------------- query : string sort_by : {'relevant', 'recent'} use_lucene_syntax : bool API Response Keys ----------------- status : string num_of_entries : int total_hits : int articles : dict keys are: canonical_url : string date_published : string formatted datetime domain : string id : int number_of_tweets : int score : float site_type : {'claim', 'fact_checking'} title : string """ lucene.getVMEnv().attachCurrentThread() # Validate input of request q_articles_schema = Schema({ 'query': lambda s: len(s) > 0, Optional('sort_by', default='relevant'): And(unicode, lambda s: s in ('relevant', 'recent')), Optional('use_lucene_syntax', default=True): And(unicode, Use(lambda s: s.lower()), lambda s: s in ('true', 'false'), Use(lambda s: True if s == 'true' else False)), }) q_kwargs = copy_req_args(request.args) try: q_kwargs = q_articles_schema.validate(q_kwargs) n, df = searcher.search( n1=N1, n2=N2, min_score_of_recent_sorting=MIN_SCORE, min_date_published=STRAMING_START_AT, **q_kwargs) df = db_query_filter_disabled_site(engine, df) df = db_query_twitter_shares(engine, df) if len(df) == 0: raise APINoResultError('No article found!') # sort dataframe by 'number_of_tweets' df = df.sort_values('number_of_tweets', ascending=False) response = dict( status='OK', num_of_entries=len(df), total_hits=n, articles=flask.json.loads(df.to_json(**TO_JSON_KWARGS))) except SchemaError as e: response = dict(status='Parameter error', error=str(e)) except APIParseError as e: response = dict(status='Invalide query', error=str(e)) except APINoResultError as e: response = dict(status='No result error', error=str(e)) except Exception as e: logger.exception(e) response = dict(status='ERROR', error='Server error, query failed!') return flask.jsonify(response)