Exemplo n.º 1
0
def run(target_folder,
        journals_path=None,
        authors_path=None,
        keywords_path=None,
        mesh_terms_path=None,
        from_date=None,
        to_date=None,
        date_interval_in_years=None):
    target_path = join(target_folder, 'results.csv')
    # Input retrieval
    journals = load_unique_lines(journals_path)
    text_words = load_unique_lines(keywords_path)
    mesh_terms = load_unique_lines(mesh_terms_path)
    authors = load_unique_lines(authors_path)
    # Get list containing tuples of date ranges based on interval
    try:
        date_ranges = get_date_ranges(from_date, to_date,
                                      date_interval_in_years)
    except ToolError as e:
        exit('to_date.error = %s' % e)

    query_list = journals if journals else authors
    search_journals = True if journals else False

    # Tabulate keywords
    results = tabulate(query_list, date_ranges, text_words, mesh_terms,
                       search_journals)
    search_counts = results['search_counts']
    queries = results['queries']
    query_totals = results['query_totals']

    # Output setup
    log_path = join(target_folder, 'log_results.txt')
    with open(log_path, 'w') as f:
        f.write(queries)
        f.write(query_totals)
    dates_index = [str(x)[:10] + ' to ' + str(y)[:10] for x, y in date_ranges]
    results_table = DataFrame(search_counts, index=dates_index)
    results_table.to_csv(target_path)

    # Required print statement for crosscompute tool
    print('results_table_path = ' + target_path)
    print('log_text_path = ' + log_path)
Exemplo n.º 2
0
def run(
        target_folder, journals_path=None, authors_path=None,
        keywords_path=None, mesh_terms_path=None,
        from_date=None, to_date=None, date_interval_in_years=None):
    target_path = join(target_folder, 'results.csv')
    # Input retrieval
    journals = load_unique_lines(journals_path)
    text_words = load_unique_lines(keywords_path)
    mesh_terms = load_unique_lines(mesh_terms_path)
    authors = load_unique_lines(authors_path)
    # Get list containing tuples of date ranges based on interval
    try:
        date_ranges = get_date_ranges(
            from_date, to_date, date_interval_in_years)
    except ToolError as e:
        exit('to_date.error = %s' % e)

    query_list = journals if journals else authors
    search_journals = True if journals else False

    # Tabulate keywords
    results = tabulate(
        query_list, date_ranges, text_words, mesh_terms, search_journals)
    search_counts = results['search_counts']
    queries = results['queries']
    query_totals = results['query_totals']

    # Output setup
    log_path = join(target_folder, 'log_results.txt')
    with open(log_path, 'w') as f:
        f.write(queries)
        f.write(query_totals)
    dates_index = [
        str(x)[:10] + ' to ' + str(y)[:10] for x, y in date_ranges]
    results_table = DataFrame(search_counts, index=dates_index)
    results_table.to_csv(target_path)

    # Required print statement for crosscompute tool
    print('results_table_path = ' + target_path)
    print('log_text_path = ' + log_path)
def test_load_unique_lines():
    lines = load_unique_lines(join(TEST_FOLDER, 'lines.txt'))
    assert lines == ['one', 'two']