Exemplo n.º 1
0
def get_x_from_y(input_file, output_file, get_func, extract_func):
    s_input = load_json_lines(input_file)
    s_input = log_stream(s_input, name='Input')
    infos = extract_func(s_input)
    s_info = get_infos(infos, get_func=get_func)
    s_info = log_stream(s_info, name='Output')
    write_json_lines(s_info, output_file)
Exemplo n.º 2
0
def get_citing_papers_info():
    filename = output_filename_pattern2.format(
        date=datetime.datetime.now().strftime("%y%m%d_%H%M"))
    papers = load_json_lines(input_filename2)
    paper_ids = get_citing_papers(papers)
    paper_infos = get_paper_infos2(paper_ids)
    write_json_lines(paper_infos, filename)
Exemplo n.º 3
0
def get_affiliation_info():
    filename = aff_pattern.format(
        date=datetime.datetime.now().strftime("%y%m%d_%H%M"))
    author = chain(load_json_lines(aff_input1), load_json_lines(aff_input2))
    aff_ids = get_aff(author)
    aff_ids = list(set(aff_ids))
    aff_info = get_aff_infos(aff_ids)
    write_json_lines(aff_info, filename)
Exemplo n.º 4
0
def get_author_info():
    filename = author_filename_pattern2.format(
        date=datetime.datetime.now().strftime("%y%m%d_%H%M"))
    papers = load_json_lines(author_input2)
    author_ids = get_authors(papers)
    author_ids = list(set(author_ids))
    author_info = get_author_infos(author_ids)
    write_json_lines(author_info, filename)
Exemplo n.º 5
0
def get_ms_ac_info1():
    filename = output_filename_pattern1.format(
        date=datetime.datetime.now().strftime("%y%m%d_%H%M"))
    papers = load_json_lines(input_filename1)
    papers_filtered = filter(lambda p: len(p['ms_academic']['entities']),
                             papers)
    paper_infos = get_paper_infos(papers_filtered)
    write_json_lines(paper_infos, filename)
Exemplo n.º 6
0
def add_ms_ac_info(input_file, output_file):
    papers = load_json_lines(input_file)
    papers = log_stream(papers, name='Input')
    papers_parsed = ({
        'ms_academic':
        get_mc_ac_paper(expr="and(Ti='" + normalize_title(p['title']) +
                        "',Y>=2014)"),
        **p
    } for p in delay(papers, 2))
    papers_parsed_printed = log_stream(papers_parsed, name='Output')
    write_json_lines(papers_parsed_printed, output_file)
Exemplo n.º 7
0
def main():
    filename = output_filename_pattern.format(
        date=datetime.datetime.now().strftime("%y%m%d_%H%M"))
    queries = gen_queries(keywords,
                          pages,
                          min_year,
                          add_quotes=add_quotes,
                          add_combinations=add_combinations,
                          title_per_page=title_per_page)
    titles = find_titles(queries)
    write_json_lines(titles, filename)
Exemplo n.º 8
0
def add_ms_ac_info():
    filename = output_filename_pattern.format(
        date=datetime.datetime.now().strftime("%y%m%d_%H%M"))
    papers = load_json_lines(input_filename)
    papers = ({
        'ms_academic':
        get_ms_ac_paper(expr="and(Ti='" + re.sub(
            " $", "", re.sub("[\:\'\,\?\!\.\-] ?", " ", p['title'].lower())) +
                        "',Y>=2014)"),
        **p
    } for p in papers)
    write_json_lines(papers, filename)
Exemplo n.º 9
0
def get_gs(keyword_file, output_file, min_year=2014, pages=20, scope=''):
    keywords_df = pd.read_csv(keyword_file)
    keywords_df = keywords_df[~keywords_df['include'].isnull()]

    keywords = keywords_df['keyword']
    keywords = '"' + keywords + '" "' + scope + '"'
    keywords = keywords.tolist()

    queries = gen_queries(keywords,
                          int(pages),
                          int(min_year),
                          title_per_page=title_per_page)
    titles = find_titles(queries)
    write_json_lines(titles, output_file)