def validate(self, query_id, article_id): """ validate -- evaluates how much article validates query :param query: query to validate :param article: article to validate with :return: match_percentage (relative measure of how well article validates query) """ max_match_value = 0 # Need to process query and article formats ds = DataSource() query_synonyms_raw = ds.get_query_synonyms(query_id) # [('and', 'CC', 'Random', []), ('julia', 'NN', 'Random', []), ('phuong', 'JJ', 'Random', []), ('test', 'NN', 'Random', ['trial', 'run', 'mental_test', 'test', 'tryout', 'trial_run', 'exam', 'examination', 'mental_testing', 'psychometric_test']), ('validator', 'NN', 'Random', [])] query_synonyms = {} for w in query_synonyms_raw: query_synonyms[self.normalize_keyword(w[0])] = [self.normalize_keyword(synonym) for synonym in w[3]] article_keyword = json.loads(ds.get_article_keywords(article_id)[0]) #{NN: [list of keywords], VB:[list of verb keywords]} article_keywords_flat = set() for pos in article_keyword: for item in article_keyword[pos]: article_keywords_flat.add(self.normalize_keyword(item[0])) match_value = 0 # find matches for query_word in query_synonyms: max_match_value += 2 if query_word in article_keywords_flat: match_value += 2 else: for synonym in query_synonyms[query_word]: if synonym in article_keywords_flat: match_value += 1 break match_percentage = 0 if max_match_value == 0 else (match_value / max_match_value) return match_percentage
def convert_symbols_to_raw(src, dst, model, level, tolevel=0): """ convert a symbolic time series to lower level or raw data """ if level == "raw": return if level == "symbol": level = 0 if level == "rle": level = 1 if level == "statecluster": level = 2 prle = pr.RLEProcess() dat = ds.FileDataSource(src, src) dat.load() dat.data = prle.batch_process(dat.data) dat.save() if int(tolevel) == 0: ctr = cv.ToRaw() ctr.convert(src, dst, model, int(level)) else: cts = cv.ToSymbols() cts.convert(src, dst, model, int(level)) if int(tolevel) == 2: prle = pr.RLEProcess() dat = ds.FileDataSource(dst, dst) dat.load() dat.data = prle.batch_process(dat.data) dat.save()
def __init__(self): """ Initializes notification clients. :return: None """ self.datasource = DataSource() self.phone_client = TwilioRestClient(twilio_account_sid, twilio_auth_token) self.email_client = sendgrid.SendGridClient(sendgrid_api_key)
def get_distance(file0, file1, rate): dat0 = ds.FileDataSource(file0, None) dat1 = ds.FileDataSource(file1, None) dat0.load() dat1.load() gdt = fu.Get_Distance() dist = gdt.levenshtein(dat0.data, dat1.data, int(rate)) print "total-distance (d) = %d"%(dist[0]) print "total-time-length (l) = %d"%(dist[1]) print "normalized-distance (d/l) = %f"%(dist[0]*1.0/dist[1])
def __init__(self): """ Initialize class variables :return: None """ self.ds = DataSource() self.ids = [] self.num_entries = 0 self.num_articles = 0 self.num_article_words = 0 self.article_titles = [] self.stopwords = set(nltk.corpus.stopwords.words('english')) self.lemmatizer = WordNetLemmatizer()
def find_states_spclust(inputfile, outputname, rate, dimensions, wgrid, wnbr, ememory, ethreshold, mindist): """ batch process using spclust symbolization - input csv file with triple (time start, time end, values) - base name for output files (can include a folder) - sampling rate of the input time series - number of dimensions of the input time series - Spclust grid size - Spclust count threshold - StateFinder fading factor - StateFinder prediciton error threshold - StateFinder min distance for clustering segments (0-1) """ call(["java", "-jar", "./Spclust/SpComputeModel.jar", inputfile, dimensions, wgrid, wnbr, outputname+"-model.spc"]) call(["java", "-jar", "./Spclust/SpComputeSymbols.jar", outputname+"-model.spc", inputfile, outputname+"-symbol.csv"]) nbclusters = int(open(outputname+"-model.spcn", 'r').readline()) src = ds.FileDataSource(outputname+"-symbol.csv", outputname+"-statefinder.csv") rel = pr.RLEProcess() sem = pr.SegmentSparseProcess(rate, ethreshold, ememory) clu = pr.ClusterSparseProcess(mindist, nbclusters) src.load() src.data = rel.batch_process(src.data) src.save_to(outputname+"-rle.csv") src.data = rel.batch_process(src.data) segments = sem.batch_process(src.data) (src.data, lookup) = clu.batch_process(segments, src.data) src.save() lookups = {0:lt.SpclustSymbolLookupTable(outputname+"-model.spc"), 1:lt.ExpandLookupTable(rate), 2:(lt.ClusterSparseLookupTable(lookup, rate))} lkf = open(outputname+"-model.mdl", 'w') pickle.dump(lookups, lkf) lkf.close()
def run(self): """ add synonyms of new keywords into query_word table :return: Nothing """ fd, fo = 0, 0 try: if "--no-lock" not in sys.argv: fo = open(os.getenv("HOME") + "/.event-detection-active", "wb") fd = fo.fileno() fcntl.lockf(fd, fcntl.LOCK_EX) ds = DataSource() unprocessed_queries = ds.get_unprocessed_queries() for query in unprocessed_queries: # access into the queries SQL table and find which queries are not process THRESHOLD = None print(query) query_parts = {"query": " ".join(filter(None, query[1:6])), "subject": query[1], "verb": query[2], "direct_obj": query[3], "indirect_obj": query[4], "location": query[5]} print(query_parts) synonyms = Query(query[0], query_parts, THRESHOLD).get_synonyms() print(synonyms) # synonyms = {NN: {word1: [list of synonym], word2: [list of synonym],...}, VB..} for pos_group in synonyms: print(synonyms[pos_group]) for query_word in synonyms[pos_group]: ds.insert_query_word_synonym(query[0], query_word, pos_group, synonyms[pos_group][query_word]) ds.post_query_processor_update(query[0]) finally: if "--no-lock" not in sys.argv: fcntl.lockf(fd, fcntl.LOCK_UN) fo.close()
class Cluster: ds = DataSource() def __init__(self, id): """ Creates a cluster object :param id: cluster id from algorithm :return: None """ self.id = id self.article_ids = [] self.article_titles = [] self.keywords = None def add_article(self, article_id, article_title): """ Adds an article to the cluster :param article_id: article id :param article_title: article title :return: None """ self.article_ids.append(article_id) self.article_titles.append(article_title) def is_valid_cluster(self, num_articles): """ Checks if cluster is valid: meaning it contains more than one article, but fewer article than a quarter of all the articles considered :param num_articles: number of articles considered :return: true if valid cluster; else false """ return num_articles / 4 > len(self.article_ids) > 1 def get_keywords(self): """ gets the cumulative list of keywords for the cluster :return: set of keywords """ # don't build keywords dictionary if it has already been built if self.keywords is None: self.keywords = set() keyword_dicts = [json.loads(self.ds.get_article_keywords(article)[0]) for article in self.article_ids] for kw_dict in keyword_dicts: for pos in kw_dict: for kw in kw_dict[pos]: self.keywords.add(kw[0]) return self.keywords def get_article_ids(self): """ Returns the article ids associated with this cluster :return: list of articles ids """ return self.article_ids
def convert_rle(src, dst): """ apply RLE compression """ dat = ds.FileDataSource(src, dst) prle = pr.RLEProcess() dat.load() dat.data = prle.batch_process(dat.data) dat.save()
def convert_median_filter(src, dst, win): """ Apply median filtering to the time series """ dat = ds.FileDataSource(src, dst) pmf = pr.MedianFilteringProcess(win) dat.load() dat.data = pmf.batch_process(dat.data) dat.save()
def split_file_by(filename, folder, offset=0, duration=86400): """ split the file for applying the forecasting algorithm """ src = ds.FileDataSource(filename, None) cut = fu.PeriodicCutProcess(int(duration), int(offset)) src.load() src.data = cut.batch_process(src.data) spl = fu.Splitter(src.data) spl.splitFiles(folder, int(duration), int(offset))
def validate(self, query_id, article_id): """ validate -- evaluates how much article validates query :param query: query to validate :param article: article to validate with :return: match_percentage (relative measure of how well article validates query) """ max_match_value = 0 # Need to process query and article formats ds = DataSource() query_synonyms_raw = ds.get_query_synonyms( query_id ) # [('and', 'CC', 'Random', []), ('julia', 'NN', 'Random', []), ('phuong', 'JJ', 'Random', []), ('test', 'NN', 'Random', ['trial', 'run', 'mental_test', 'test', 'tryout', 'trial_run', 'exam', 'examination', 'mental_testing', 'psychometric_test']), ('validator', 'NN', 'Random', [])] query_synonyms = {} for w in query_synonyms_raw: query_synonyms[self.normalize_keyword( w[0])] = [self.normalize_keyword(synonym) for synonym in w[3]] article_keyword = json.loads( ds.get_article_keywords(article_id) [0]) #{NN: [list of keywords], VB:[list of verb keywords]} article_keywords_flat = set() for pos in article_keyword: for item in article_keyword[pos]: article_keywords_flat.add(self.normalize_keyword(item[0])) match_value = 0 # find matches for query_word in query_synonyms: max_match_value += 2 if query_word in article_keywords_flat: match_value += 2 else: for synonym in query_synonyms[query_word]: if synonym in article_keywords_flat: match_value += 1 break match_percentage = 0 if max_match_value == 0 else (match_value / max_match_value) return match_percentage
def find_states(inputfile, outputname, rate, smethod, snbr, ememory, ethreshold, mindist): """ batch process using standard symbolization - input csv file with triple (time start, time end, value) - base name for output files (can include a folder) - sampling rate of the input time series - symbolization method (0:unifom, 1:median, 2:distinct median) - number of symbols to generates - StateFinder fading factor - StateFinder prediciton error threshold - StateFinder min distance for clustering segments (0-1) """ src = ds.FileDataSource(inputfile, outputname+"-statefinder.csv") src.load() enc = sbz.UniformSymbolizer() if smethod == "1": enc = sbz.MedianSymbolizer() if smethod == "2": enc = sbz.DistinctMedianSymbolizer() enc.load(src.data) (sep, mini, maxi) = enc.get_separators(int(snbr)) sym = pr.SymbolizeProcess(1, sep) rel = pr.RLEProcess() sem = pr.SegmentSparseProcess(rate, ethreshold, ememory) clu = pr.ClusterSparseProcess(mindist, int(snbr)+1) src.data = sym.batch_process(src.data) src.save_to(outputname+"-symbol.csv") src.data = rel.batch_process(src.data) src.save_to(outputname+"-rle.csv") segments = sem.batch_process(src.data) (src.data, lookup) = clu.batch_process(segments, src.data) src.save() lookups = {0:lt.SymbolLookupTable(sep, mini, maxi), 1:lt.ExpandLookupTable(rate), 2:(lt.ClusterSparseLookupTable(lookup, rate))} lkf = open(outputname+"-model.mdl", 'w') pickle.dump(lookups, lkf) lkf.close()
def run(self): """ adds keywords as a JSON string to articles in database :return: Nothing """ fd, fo = 0, 0 try: if "--no-lock" not in sys.argv: path = articles_path fo = open(os.getenv("HOME") + "/.event-detection-active", "wb") fd = fo.fileno() fcntl.lockf(fd, fcntl.LOCK_EX) ds = DataSource() unprocessed_articles = ds.get_unprocessed_articles() for article in unprocessed_articles: try: extractor = KeywordExtractor() article_id = article[0] article_filename = article[2] article_title = article[1] article_url = article[3] article_source = article[4] article_file = open( os.getcwd() + "/articles/{0}".format(article_filename), "r", encoding="utf8") body = article_file.read() article_file.close() article_with_body = Article(article_title, body, article_url, article_source) keywords = extractor.extract_keywords(article_with_body) keyword_string = json.dumps(keywords) ds.add_keywords_to_article(article_id, keyword_string) ds.add_article_to_query_articles(article_id) except (FileNotFoundError, IOError): print("Wrong file or file path", file=sys.stderr) finally: if "--no-lock" not in sys.argv: fcntl.lockf(fd, fcntl.LOCK_UN) fo.close()
def run(self): """ adds keywords as a JSON string to articles in database :return: Nothing """ fd, fo = 0, 0 try: if "--no-lock" not in sys.argv: path = articles_path fo = open(os.getenv("HOME") + "/.event-detection-active", "wb") fd = fo.fileno() fcntl.lockf(fd, fcntl.LOCK_EX) ds = DataSource() unprocessed_articles = ds.get_unprocessed_articles() for article in unprocessed_articles: try: extractor = KeywordExtractor() article_id = article[0] article_filename = article[2] article_title = article[1] article_url = article[3] article_source = article[4] article_file = open(os.getcwd()+"/articles/{0}".format(article_filename), "r", encoding="utf8") body = article_file.read() article_file.close() article_with_body = Article(article_title, body, article_url, article_source) keywords = extractor.extract_keywords(article_with_body) keyword_string = json.dumps(keywords) ds.add_keywords_to_article(article_id, keyword_string) ds.add_article_to_query_articles(article_id) except (FileNotFoundError, IOError): print("Wrong file or file path", file=sys.stderr) finally: if "--no-lock" not in sys.argv: fcntl.lockf(fd, fcntl.LOCK_UN) fo.close()
class Notifier: """ Used to notify user of query detection """ bitly_api_url = "https://api-ssl.bitly.com" def __init__(self): """ Initializes notification clients. :return: None """ self.datasource = DataSource() self.phone_client = TwilioRestClient(twilio_account_sid, twilio_auth_token) self.email_client = sendgrid.SendGridClient(sendgrid_api_key) def check_valid_phone(self, phone): if phone is None: return False return (re.match(r'\+1[0-9]{9}', phone) != None) def check_valid_email(self, email): if email is None: return False return (re.match(r'[^\.]+@[^\.]+\.[^\.]+', email) != None) def alert_phone(self, text): """ Sends text message :param message: the text body :return: None """ if self.check_valid_phone(self.phone): try: self.phone_client.messages.create(body=text, to=self.phone, from_=twilio_number) except: print( "Twilio Error. If using a trial account, make sure phone number is verified with twilio at twilio.com/user/account/phone-numbers/verified" ) def alert_email(self, text): """ Sends email message :param text: the email body in html :return: None """ if self.check_valid_email(self.email): message = sendgrid.Mail() message.add_to(self.email) message.set_from(from_email) message.set_subject("Event Detection") message.set_html(text) status_code, status_message = self.email_client.send(message) if int(status_code) != 200: print("Error " + str(status_code) + " : " + str(status_message)) def on_validation(self, query_id, article_ids): """ Notifies user on validation :param query_id: query that was validated :param article_ids: articles that validated query :return: None """ query_string = " ".join(self.datasource.get_query_elements(query_id)) article_data = [] for article_id in article_ids: article_url = self.get_article_shortlink( self.datasource.get_article_url(article_id)) article_title = self.datasource.get_article_title(article_id) article_data.append((article_title, article_url)) html = self.format_html(query_string, article_data) texts = self.format_plaintext(query_string, article_data) self.phone, self.email = self.datasource.get_email_and_phone(query_id) self.alert_email(html) for text in texts: self.alert_phone(text) @staticmethod def format_html(query_string, article_data): """ formats body of email :param query_string: query that was validated :param article_data: article that validated query :return: html of email body """ html = "<h1>{query}</h1><p>Articles:</p>".format(query=query_string) for article in article_data: article_title = article[0] article_url = article[1] html += "<p><a href=\"{url}\">{title}</a></p>".format( url=article_url, title=article_title) return html @staticmethod def format_plaintext(query_string, article_data): """ formats text message formats body of email :param query_string: query that was validated :param article_data: article that validated query :return: text body """ texts = [] text = "Event Detected!\nQuery: {query}\nArticles: ".format( query=query_string) for article in article_data: article_title = article[0] article_url = article[1] next_article = "\n{title}\nLink {url}\n".format( url=article_url, title=article_title) if len(text) + len(next_article) > 1600: texts.append(text) text = "Event Detected!\nQuery: {query}\nArticles: ".format( query=query_string) text += next_article texts.append(text) return texts def get_article_shortlink(self, article_url): """ Gets a shortlink from bitly for the article url :param article_url: the url to shorten :return: the shortened url if successful (otherwise just the article url) """ payload = { "longUrl": article_url, "login": bitly_api_login, "apiKey": bitly_api_key } response = requests.get(self.bitly_api_url + "/v3/shorten", params=payload) response_json = response.json() # look for data -> url -> short url in response_json # if it's not there, just return the old url if "data" in response_json and "url" in response_json["data"]: return response_json["data"]["url"] return article_url
class Notifier: """ Used to notify user of query detection """ bitly_api_url = "https://api-ssl.bitly.com" def __init__(self): """ Initializes notification clients. :return: None """ self.datasource = DataSource() self.phone_client = TwilioRestClient(twilio_account_sid, twilio_auth_token) self.email_client = sendgrid.SendGridClient(sendgrid_api_key) def check_valid_phone(self, phone): if phone is None: return False return (re.match(r'\+1[0-9]{9}', phone) != None) def check_valid_email(self, email): if email is None: return False return (re.match(r'[^\.]+@[^\.]+\.[^\.]+', email) != None) def alert_phone(self, text): """ Sends text message :param message: the text body :return: None """ if self.check_valid_phone(self.phone): try: self.phone_client.messages.create(body=text, to=self.phone, from_=twilio_number) except: print("Twilio Error. If using a trial account, make sure phone number is verified with twilio at twilio.com/user/account/phone-numbers/verified") def alert_email(self, text): """ Sends email message :param text: the email body in html :return: None """ if self.check_valid_email(self.email): message = sendgrid.Mail() message.add_to(self.email) message.set_from(from_email) message.set_subject("Event Detection") message.set_html(text) status_code, status_message = self.email_client.send(message) if int(status_code) != 200: print("Error " + str(status_code) + " : " + str(status_message)) def on_validation(self, query_id, article_ids): """ Notifies user on validation :param query_id: query that was validated :param article_ids: articles that validated query :return: None """ query_string = " ".join(self.datasource.get_query_elements(query_id)) article_data = [] for article_id in article_ids: article_url = self.get_article_shortlink(self.datasource.get_article_url(article_id)) article_title = self.datasource.get_article_title(article_id) article_data.append((article_title, article_url)) html = self.format_html(query_string, article_data) texts = self.format_plaintext(query_string, article_data) self.phone, self.email = self.datasource.get_email_and_phone(query_id) self.alert_email(html) for text in texts: self.alert_phone(text) @staticmethod def format_html(query_string, article_data): """ formats body of email :param query_string: query that was validated :param article_data: article that validated query :return: html of email body """ html = "<h1>{query}</h1><p>Articles:</p>".format(query = query_string) for article in article_data: article_title = article[0] article_url = article[1] html += "<p><a href=\"{url}\">{title}</a></p>".format(url=article_url, title=article_title) return html @staticmethod def format_plaintext(query_string, article_data): """ formats text message formats body of email :param query_string: query that was validated :param article_data: article that validated query :return: text body """ texts = [] text = "Event Detected!\nQuery: {query}\nArticles: ".format(query = query_string) for article in article_data: article_title = article[0] article_url = article[1] next_article = "\n{title}\nLink {url}\n".format(url=article_url, title=article_title) if len(text) + len(next_article) > 1600: texts.append(text) text = "Event Detected!\nQuery: {query}\nArticles: ".format(query = query_string) text += next_article texts.append(text) return texts def get_article_shortlink(self, article_url): """ Gets a shortlink from bitly for the article url :param article_url: the url to shorten :return: the shortened url if successful (otherwise just the article url) """ payload = {"longUrl": article_url, "login": bitly_api_login, "apiKey": bitly_api_key} response = requests.get(self.bitly_api_url + "/v3/shorten", params=payload) response_json = response.json() # look for data -> url -> short url in response_json # if it's not there, just return the old url if "data" in response_json and "url" in response_json["data"]: return response_json["data"]["url"] return article_url
# coding=utf-8 import tensorflow as tf import Utils.DataSource as ds mnist = ds.readMnist("data/") sess = tf.InteractiveSession() x = tf.placeholder(tf.float32, shape=[None, 784]) y_ = tf.placeholder(tf.float32, shape=[None, 10]) # Weight Initialization def weight_variable(shape): initial = tf.truncated_normal(shape, stddev=0.1) return tf.Variable(initial) def bias_variable(shape): initial = tf.constant(0.1, shape=shape) return tf.Variable(initial) # Convolution and Pooling def conv2d(x, W): return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME') def max_pool_2x2(x): return tf.nn.max_pool(x, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # First Convolutional Layer W_conv1 = weight_variable([5, 5, 1, 32]) b_conv1 = bias_variable([32])
class MatrixCreator: def __init__(self): """ Initialize class variables :return: None """ self.ds = DataSource() self.ids = [] self.num_entries = 0 self.num_articles = 0 self.num_article_words = 0 self.article_titles = [] self.stopwords = set(nltk.corpus.stopwords.words('english')) self.lemmatizer = WordNetLemmatizer() def get_num_entries(self): """ Gets the number of non-zero entries in the matrix. :return: number of non-zero entries in the matrix """ return self.num_entries def get_num_articles(self): """ Gets the number of articles, which is the number of rows in the matrix :return: number of articles """ return self.num_articles def get_num_article_words(self): """ Gets the number of unique words across all documents :return: number of unique words across all documents """ return self.num_article_words def get_article_titles(self): """ Gets ordered list of article titles corresponding to article ids in self.ids :return: list of article titles """ return self.article_titles def get_article_ids(self): """ Gets article ids :return: list of article ids """ return self.ids def retrieve_article_ids_titles_filenames(self): """ Retrieves article ids, titles, and filenames from database and sets class variables :return: None """ articles = self.ds.get_article_ids_titles_filenames() self.ids = [] self.article_titles = [] self.filenames = [] for article in articles: if os.path.isfile(articles_path + article[2]): self.ids.append(article[0]) self.article_titles.append(article[1]) self.filenames.append(article[2]) self.num_articles = len(self.article_titles) def get_article_text_by_article(self): """ Gets list of sets of words by article, along with set of keywords across all articles :return: Set of words used in all articles """ pattern = re.compile(r'TITLE:(.*)TEXT:(.*)', re.DOTALL) self.article_words_by_article = [] all_article_words_set = set() for idx, filename in enumerate(self.filenames): article_file = open(articles_path + filename, "r", encoding="utf8") body = article_file.read() article_file.close() tagged_items = re.match(pattern, body) title_tagged = tagged_items.group(1) body_tagged = tagged_items.group(2) extractor = KeywordExtractor() title_text, _ = extractor.preprocess_keywords(title_tagged) body_text, _ = extractor.preprocess_keywords(body_tagged) body_text.extend(title_text) body_text = [ Counter(sentence.strip().split()) for sentence in body_text ] body_text_counter = Counter() for sentence in body_text: body_text_counter.update(sentence) all_article_words_set.update(sentence.keys()) self.article_words_by_article.append(body_text_counter) self.num_article_words = len(all_article_words_set) return all_article_words_set def construct_matrix(self): """ Constructs an articles by words numpy array and populates it with tfidf values for each article-word cell. :return: tfidf matrix, or None if matrix empty (usually occurs when no articles found for some reason, (for example, if working directory is not root directory) """ # Initialize article ids and titles self.retrieve_article_ids_titles_filenames() # Get keywords to construct matrix all_article_words_list = list(self.get_article_text_by_article()) matrix = zeros((self.num_articles, self.num_article_words)) num_entries = 0 for article_word_idx, article_word in enumerate( all_article_words_list): for article_idx, article_id in enumerate(self.ids): if article_word in self.article_words_by_article[article_idx]: matrix[article_idx, article_word_idx] += self.article_words_by_article[ article_idx][article_word] num_entries += 1 self.num_entries = num_entries # Count num entries to calculate K #if matrix is empty, we cannot use it if matrix.shape == (0, 0): return None transformer = TfidfTransformer() tfidf_matrix = transformer.fit_transform(matrix).toarray() return tfidf_matrix
import sys import os sys.path.insert(0, os.path.abspath('..')) sys.path.insert(0, os.path.abspath('.')) from flask import Flask, render_template, request, redirect import subprocess from Utils import subprocess_helpers from Utils.DataSource import * app = Flask(__name__) dataSource = DataSource() def launch_preprocessors(): process = subprocess.Popen(subprocess_helpers.python_path + " Daemons/QueryProcessorDaemon.py && " + subprocess_helpers.python_path + " Daemons/ArticleProcessorDaemon.py", executable=subprocess_helpers.executable, shell=True, universal_newlines=True) @app.route("/", methods=["GET"]) def queries(): # Get lists of query from database with counts of associated articles all_queries = dataSource.queries_route() queries_formatted = [{ "id": q[0], "subject": q[1],