Пример #1
0
def tabulate_entities(query_list, date_ranges, text_words, mesh_terms, author):
    dates = []
    log = []
    author_articles = defaultdict(list)
    counts = defaultdict(list)
    keyword_counts = defaultdict(list)

    for from_date, to_date in date_ranges:
        dates.append(pd.Timestamp(from_date))
        for item in query_list:
            query_param = (
                {'author_name': item} if author else {'journal_name': item})
            # Query totals (w/o keywords)
            item_query = get_expression(
                from_date=from_date, to_date=to_date, **query_param)
            item_articles = get_search_count(item_query)
            item_count = len(item_articles)
            query = get_expression(
                text_terms=text_words,
                mesh_terms=mesh_terms,
                from_date=from_date, to_date=to_date,
                **query_param)
            articles = get_search_count(query)
            keyword_count = len(articles)
            log.append("{query}\n{count}".format(
                query=item_query, count=item_count))
            log.append("{query}\n{count}".format(
                query=query, count=keyword_count))
            if author:
                author_articles[item].extend(item_articles)
            # Get search count data for each Query (w/ keywords)
            counts[item].append(item_count)
            keyword_counts[item].append(keyword_count)
    index = pd.Index(dates, name='dates')
    search_counts = pd.DataFrame(counts, index=index)
    keyword_search_counts = pd.DataFrame(keyword_counts, index=index)
    return dict(
        search_counts=search_counts,
        keyword_search_counts=keyword_search_counts,
        author_articles=author_articles,
        log=log)
Пример #2
0
def tabulate_keywords(date_ranges, text_words, mesh_terms):
    counts = defaultdict(list)
    dates = []
    log = []
    for from_date, to_date in date_ranges:
        query = get_expression(
            text_terms=text_words, mesh_terms=mesh_terms,
            from_date=from_date, to_date=to_date)
        articles = get_search_count(query)
        count = len(articles)
        log.append("{query}\n{count}".format(query=query, count=count))
        dates.append(pd.Timestamp(from_date))
        counts['count'].append(count)
    index = pd.Index(dates, name='dates')
    search_counts = pd.DataFrame(counts, index=index)
    return dict(search_counts=search_counts, log=log)
Пример #3
0
def tabulate(query_list, date_ranges, text_words, mesh_terms, search_journals):
    search_counts = {}
    queries = []
    query_totals = []
    # O(n*y) for n=len(query_list) and y=len(date_ranges)
    if len(query_list) > 0:
        for item in query_list:
            total = 'Total Article Count [' + item + ']'
            partial = 'Keyword Article Count [' + item + ']'
            search_counts[partial] = []
            search_counts[total] = []
            # search_counts[item] = {'partial':[], 'total':[]}
            for from_date, to_date in date_ranges:
                # Query totals (w/o keywords)
                if search_journals:
                    item_expression = get_expression(journal_name=item,
                                                     from_date=from_date,
                                                     to_date=to_date)
                else:
                    item_expression = get_expression(author_name=item,
                                                     from_date=from_date,
                                                     to_date=to_date)
                item_count = get_search_count(item_expression)
                search_counts[total].append(item_count)
                print('Total - ' + item_expression)
                print(str(item_count) + '\n')
                query_totals.append('Total - ' + item_expression + '\n' +
                                    str(item_count))

                # Get search count data for each Query (w/ keywords)
                if search_journals:
                    expression = get_expression(journal_name=item,
                                                text_terms=text_words,
                                                mesh_terms=mesh_terms,
                                                from_date=from_date,
                                                to_date=to_date)
                else:
                    expression = get_expression(author_name=item,
                                                text_terms=text_words,
                                                mesh_terms=mesh_terms,
                                                from_date=from_date,
                                                to_date=to_date)

                count = get_search_count(expression)
                search_counts[partial].append(count)
                # Log is printed to standard output and file
                print(expression)
                print(str(count) + '\n')
                queries.append(expression + '\n' + str(count))
    else:
        search_counts['Counts'] = []
        for from_date, to_date in date_ranges:
            expression = get_expression(text_terms=text_words,
                                        mesh_terms=mesh_terms,
                                        from_date=from_date,
                                        to_date=to_date)
            count = get_search_count(expression)
            search_counts['Counts'].append(count)
            # Log is printed to standard output and file
            print(expression)
            print(str(count) + '\n')
            queries.append(expression + '\n' + str(count))
    return dict(search_counts=search_counts,
                queries='\n\n'.join(queries),
                query_totals='\n\n'.join(query_totals))
Пример #4
0
def tabulate(query_list, date_ranges, text_words, mesh_terms, isAuthor):
    # O(n*y) for n=len(query_list) and y=len(date_ranges)
    author_articles = defaultdict(list)
    sc = []
    if query_list:
        sc = [('from', 'to', 'name', 'count', 'count w/ keywords')]
        for from_date, to_date in date_ranges:
            for item in query_list:
                query_param = ({'author_name': item} if isAuthor
                               else {'journal_name': item})
                # Query totals (w/o keywords)
                item_expression = get_expression(
                    from_date=from_date, to_date=to_date, **query_param)
                expression = get_expression(
                    text_terms=text_words,
                    mesh_terms=mesh_terms,
                    from_date=from_date, to_date=to_date,
                    **query_param)
                with query() as cursor:
                    search_list = [(item_expression,), (expression,)]
                    item_articles= [str(article[0]) for article in cursor.execute("""SELECT article from
                                                  articles where query=?  """,
                                                  (item_expression,)
                                                  ).fetchall()]
                    keyword_articles = cursor.execute("""SELECT article from
                                                      articles where query=?
                                                      """,
                                                      (expression,)
                                                      ).fetchall()
                    keyword_count = len(keyword_articles)
                    item_count = len(item_articles)
                if not item_count:
                    item_articles = get_search_count(item_expression)
                    item_count = len(item_articles)
                    insert_articles = [(item_expression, article)
                                       for article in item_articles]
                    with query() as cursor:
                        cursor.executemany("""INSERT INTO
                                         articles(query, article)
                                         values(?, ?)""", insert_articles)
                if isAuthor:
                    author_articles[item].extend(item_articles)
                # Get search count data for each Query (w/ keywords)
                if not keyword_count:
                    keyword_articles = get_search_count(expression)
                    keyword_count = len(keyword_articles)
                    with query() as cursor:
                        insert_articles = [(expression, article)
                                           for article in keyword_articles]
                        cursor.executemany("""INSERT INTO
                                           articles(query, article)
                                           values(?, ?)""", insert_articles)
                sc.append((from_date, to_date, item,
                           item_count, keyword_count))
    else:
        sc = [('from', 'to', 'count')]
        for from_date, to_date in date_ranges:
            expression = get_expression(
                text_terms=text_words, mesh_terms=mesh_terms,
                from_date=from_date, to_date=to_date)
            with query() as cursor:
                count = len(cursor.execute("""SELECT article from articles
                                           where query = ?""",
                                           (expression,)).fetchall())
            if not count:
                articles = get_search_count(expression)
                count = len(articles)
                with query() as cursor:
                    insert_articles = [(expression, article)
                                       for article in articles]
                    cursor.executemany("""INSERT INTO articles(query, article)
                                        values(?, ?)""", list_articles)
            sc.append((from_date, to_date, count))
    return dict(search_counts=sc, author_articles=author_articles)
Пример #5
0
def tabulate(query_list, date_ranges, text_words, mesh_terms, search_journals):
    search_counts = {}
    queries = []
    query_totals = []
    # O(n*y) for n=len(query_list) and y=len(date_ranges)
    if len(query_list) > 0:
        for item in query_list:
            total = 'Total Article Count [' + item + ']'
            partial = 'Keyword Article Count [' + item + ']'
            search_counts[partial] = []
            search_counts[total] = []
            # search_counts[item] = {'partial':[], 'total':[]}
            for from_date, to_date in date_ranges:
                # Query totals (w/o keywords)
                if search_journals:
                    item_expression = get_expression(
                        journal_name=item,
                        from_date=from_date, to_date=to_date)
                else:
                    item_expression = get_expression(
                        author_name=item,
                        from_date=from_date, to_date=to_date)
                item_count = get_search_count(item_expression)
                search_counts[total].append(item_count)
                print('Total - ' + item_expression)
                print(str(item_count) + '\n')
                query_totals.append(
                    'Total - ' + item_expression + '\n' + str(item_count))

                # Get search count data for each Query (w/ keywords)
                if search_journals:
                    expression = get_expression(
                        journal_name=item, text_terms=text_words,
                        mesh_terms=mesh_terms,
                        from_date=from_date, to_date=to_date)
                else:
                    expression = get_expression(
                        author_name=item, text_terms=text_words,
                        mesh_terms=mesh_terms,
                        from_date=from_date, to_date=to_date)

                count = get_search_count(expression)
                search_counts[partial].append(count)
                # Log is printed to standard output and file
                print(expression)
                print(str(count) + '\n')
                queries.append(expression + '\n' + str(count))
    else:
        search_counts['Counts'] = []
        for from_date, to_date in date_ranges:
            expression = get_expression(
                text_terms=text_words, mesh_terms=mesh_terms,
                from_date=from_date, to_date=to_date)
            count = get_search_count(expression)
            search_counts['Counts'].append(count)
            # Log is printed to standard output and file
            print(expression)
            print(str(count) + '\n')
            queries.append(expression + '\n' + str(count))
    return dict(
        search_counts=search_counts,
        queries='\n\n'.join(queries),
        query_totals='\n\n'.join(query_totals))
def test_retstart():
    expression = 'Reshma%20Jagsi[author]'
    articles_list_20 = get_search_count(expression, retmax=20)
    articles_list_100 = get_search_count(expression, retmax=1000)
    assert len(articles_list_20) == len(articles_list_100)
def test_retstart():
    expression = 'Reshma%20Jagsi[author]'
    articles_list_20 = get_search_count(expression, retmax=20)
    articles_list_100 = get_search_count(expression, retmax=1000)
    assert len(articles_list_20) == len(articles_list_100)