def tabulate_entities(query_list, date_ranges, text_words, mesh_terms, author): dates = [] log = [] author_articles = defaultdict(list) counts = defaultdict(list) keyword_counts = defaultdict(list) for from_date, to_date in date_ranges: dates.append(pd.Timestamp(from_date)) for item in query_list: query_param = ( {'author_name': item} if author else {'journal_name': item}) # Query totals (w/o keywords) item_query = get_expression( from_date=from_date, to_date=to_date, **query_param) item_articles = get_search_count(item_query) item_count = len(item_articles) query = get_expression( text_terms=text_words, mesh_terms=mesh_terms, from_date=from_date, to_date=to_date, **query_param) articles = get_search_count(query) keyword_count = len(articles) log.append("{query}\n{count}".format( query=item_query, count=item_count)) log.append("{query}\n{count}".format( query=query, count=keyword_count)) if author: author_articles[item].extend(item_articles) # Get search count data for each Query (w/ keywords) counts[item].append(item_count) keyword_counts[item].append(keyword_count) index = pd.Index(dates, name='dates') search_counts = pd.DataFrame(counts, index=index) keyword_search_counts = pd.DataFrame(keyword_counts, index=index) return dict( search_counts=search_counts, keyword_search_counts=keyword_search_counts, author_articles=author_articles, log=log)
def tabulate_keywords(date_ranges, text_words, mesh_terms): counts = defaultdict(list) dates = [] log = [] for from_date, to_date in date_ranges: query = get_expression( text_terms=text_words, mesh_terms=mesh_terms, from_date=from_date, to_date=to_date) articles = get_search_count(query) count = len(articles) log.append("{query}\n{count}".format(query=query, count=count)) dates.append(pd.Timestamp(from_date)) counts['count'].append(count) index = pd.Index(dates, name='dates') search_counts = pd.DataFrame(counts, index=index) return dict(search_counts=search_counts, log=log)
def tabulate(query_list, date_ranges, text_words, mesh_terms, search_journals): search_counts = {} queries = [] query_totals = [] # O(n*y) for n=len(query_list) and y=len(date_ranges) if len(query_list) > 0: for item in query_list: total = 'Total Article Count [' + item + ']' partial = 'Keyword Article Count [' + item + ']' search_counts[partial] = [] search_counts[total] = [] # search_counts[item] = {'partial':[], 'total':[]} for from_date, to_date in date_ranges: # Query totals (w/o keywords) if search_journals: item_expression = get_expression(journal_name=item, from_date=from_date, to_date=to_date) else: item_expression = get_expression(author_name=item, from_date=from_date, to_date=to_date) item_count = get_search_count(item_expression) search_counts[total].append(item_count) print('Total - ' + item_expression) print(str(item_count) + '\n') query_totals.append('Total - ' + item_expression + '\n' + str(item_count)) # Get search count data for each Query (w/ keywords) if search_journals: expression = get_expression(journal_name=item, text_terms=text_words, mesh_terms=mesh_terms, from_date=from_date, to_date=to_date) else: expression = get_expression(author_name=item, text_terms=text_words, mesh_terms=mesh_terms, from_date=from_date, to_date=to_date) count = get_search_count(expression) search_counts[partial].append(count) # Log is printed to standard output and file print(expression) print(str(count) + '\n') queries.append(expression + '\n' + str(count)) else: search_counts['Counts'] = [] for from_date, to_date in date_ranges: expression = get_expression(text_terms=text_words, mesh_terms=mesh_terms, from_date=from_date, to_date=to_date) count = get_search_count(expression) search_counts['Counts'].append(count) # Log is printed to standard output and file print(expression) print(str(count) + '\n') queries.append(expression + '\n' + str(count)) return dict(search_counts=search_counts, queries='\n\n'.join(queries), query_totals='\n\n'.join(query_totals))
def tabulate(query_list, date_ranges, text_words, mesh_terms, isAuthor): # O(n*y) for n=len(query_list) and y=len(date_ranges) author_articles = defaultdict(list) sc = [] if query_list: sc = [('from', 'to', 'name', 'count', 'count w/ keywords')] for from_date, to_date in date_ranges: for item in query_list: query_param = ({'author_name': item} if isAuthor else {'journal_name': item}) # Query totals (w/o keywords) item_expression = get_expression( from_date=from_date, to_date=to_date, **query_param) expression = get_expression( text_terms=text_words, mesh_terms=mesh_terms, from_date=from_date, to_date=to_date, **query_param) with query() as cursor: search_list = [(item_expression,), (expression,)] item_articles= [str(article[0]) for article in cursor.execute("""SELECT article from articles where query=? """, (item_expression,) ).fetchall()] keyword_articles = cursor.execute("""SELECT article from articles where query=? """, (expression,) ).fetchall() keyword_count = len(keyword_articles) item_count = len(item_articles) if not item_count: item_articles = get_search_count(item_expression) item_count = len(item_articles) insert_articles = [(item_expression, article) for article in item_articles] with query() as cursor: cursor.executemany("""INSERT INTO articles(query, article) values(?, ?)""", insert_articles) if isAuthor: author_articles[item].extend(item_articles) # Get search count data for each Query (w/ keywords) if not keyword_count: keyword_articles = get_search_count(expression) keyword_count = len(keyword_articles) with query() as cursor: insert_articles = [(expression, article) for article in keyword_articles] cursor.executemany("""INSERT INTO articles(query, article) values(?, ?)""", insert_articles) sc.append((from_date, to_date, item, item_count, keyword_count)) else: sc = [('from', 'to', 'count')] for from_date, to_date in date_ranges: expression = get_expression( text_terms=text_words, mesh_terms=mesh_terms, from_date=from_date, to_date=to_date) with query() as cursor: count = len(cursor.execute("""SELECT article from articles where query = ?""", (expression,)).fetchall()) if not count: articles = get_search_count(expression) count = len(articles) with query() as cursor: insert_articles = [(expression, article) for article in articles] cursor.executemany("""INSERT INTO articles(query, article) values(?, ?)""", list_articles) sc.append((from_date, to_date, count)) return dict(search_counts=sc, author_articles=author_articles)
def tabulate(query_list, date_ranges, text_words, mesh_terms, search_journals): search_counts = {} queries = [] query_totals = [] # O(n*y) for n=len(query_list) and y=len(date_ranges) if len(query_list) > 0: for item in query_list: total = 'Total Article Count [' + item + ']' partial = 'Keyword Article Count [' + item + ']' search_counts[partial] = [] search_counts[total] = [] # search_counts[item] = {'partial':[], 'total':[]} for from_date, to_date in date_ranges: # Query totals (w/o keywords) if search_journals: item_expression = get_expression( journal_name=item, from_date=from_date, to_date=to_date) else: item_expression = get_expression( author_name=item, from_date=from_date, to_date=to_date) item_count = get_search_count(item_expression) search_counts[total].append(item_count) print('Total - ' + item_expression) print(str(item_count) + '\n') query_totals.append( 'Total - ' + item_expression + '\n' + str(item_count)) # Get search count data for each Query (w/ keywords) if search_journals: expression = get_expression( journal_name=item, text_terms=text_words, mesh_terms=mesh_terms, from_date=from_date, to_date=to_date) else: expression = get_expression( author_name=item, text_terms=text_words, mesh_terms=mesh_terms, from_date=from_date, to_date=to_date) count = get_search_count(expression) search_counts[partial].append(count) # Log is printed to standard output and file print(expression) print(str(count) + '\n') queries.append(expression + '\n' + str(count)) else: search_counts['Counts'] = [] for from_date, to_date in date_ranges: expression = get_expression( text_terms=text_words, mesh_terms=mesh_terms, from_date=from_date, to_date=to_date) count = get_search_count(expression) search_counts['Counts'].append(count) # Log is printed to standard output and file print(expression) print(str(count) + '\n') queries.append(expression + '\n' + str(count)) return dict( search_counts=search_counts, queries='\n\n'.join(queries), query_totals='\n\n'.join(query_totals))
def test_retstart(): expression = 'Reshma%20Jagsi[author]' articles_list_20 = get_search_count(expression, retmax=20) articles_list_100 = get_search_count(expression, retmax=1000) assert len(articles_list_20) == len(articles_list_100)