def merge_altmetric_plumx_all(alt_folder, plu_folder, dst_folder):
    check_file_url(dst_folder)
    file_list = os.listdir(alt_folder)
    for file_ in file_list:
        print file_
        if not str(file_).endswith('xlsx'):
            continue
        alt_file_url = (alt_folder if str(alt_folder).endswith(os.path.sep)
                        else (alt_folder + os.path.sep)) + file_
        plu_file_url = (plu_folder if str(plu_folder).endswith(os.path.sep)
                        else (plu_folder + os.path.sep)) + file_
        dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else
                   (dst_folder + os.path.sep)) + file_
        merge_altmetric_plumx(alt_file_url,
                              plu_file_url,
                              dst_url,
                              alt_columns=[
                                  'TC', 'twitter', 'facebook', 'wikipedia',
                                  'redditors', 'f1000'
                              ],
                              plu_columns=[
                                  'twitter', 'facebook',
                                  'reference_count_wikipedia',
                                  'comment_count_reddit'
                              ],
                              merge_on=['SO', 'DI'])

    return
def merge_plumx_elsevier_springer_views_all(plu_folder, els_folder, spr_folder,
                                            dst_folder):
    check_file_url(dst_folder)
    file_list = os.listdir(plu_folder)
    for file_ in file_list:
        print file_
        if not str(file_).endswith('xlsx'):
            continue
        plu_file_url = (plu_folder if str(plu_folder).endswith(os.path.sep)
                        else (plu_folder + os.path.sep)) + file_
        els_file_url = (els_folder if str(els_folder).endswith(os.path.sep)
                        else (els_folder + os.path.sep)) + file_
        spr_file_url = (spr_folder if str(spr_folder).endswith(os.path.sep)
                        else (spr_folder + os.path.sep)) + file_
        dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else
                   (dst_folder + os.path.sep)) + file_
        merge_plumx_elsevier_springer_views(
            plu_file_url,
            els_file_url,
            spr_file_url,
            dst_url,
            plu_columns=['abstruct_views', 'full_text_views', 'exports_saves'],
            els_columns=['views', 'citations'],
            spr_columns=['downloads', 'citationsx'],
            merge_on=['SO', 'DI'])

    return
def merge_altmetric_plumx_plos_all(alt_folder, plu_folder, plos_folder,
                                   dst_folder):
    check_file_url(dst_folder)
    file_list = os.listdir(alt_folder)
    for file_ in file_list:
        print file_
        if not (str(file_).endswith('xlsx') or str(file_).endswith('csv')):
            continue
        alt_file_url = (alt_folder if str(alt_folder).endswith(os.path.sep)
                        else (alt_folder + os.path.sep)) + file_
        plu_file_url = (plu_folder if str(plu_folder).endswith(os.path.sep)
                        else (plu_folder + os.path.sep)) + file_
        plos_file_url = (plos_folder if str(plos_folder).endswith(os.path.sep)
                         else (plos_folder + os.path.sep)) + file_
        dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else
                   (dst_folder + os.path.sep)) + file_
        merge_altmetric_plumx_plos(
            alt_file_url,
            plu_file_url,
            plos_file_url,
            dst_url,
            alt_columns=[
                'Alt_Tweeters', 'Alt_Facebook_Pages', 'Alt_Wikipedia_Pages ',
                'Alt_Redditors'
            ],
            plu_columns=['soical_FACEBOOK_COUNT', 'soical_TWEET_COUNT'],
            plos_columns=['Twitter Total', 'Wikipedia Total'],
            merge_on=['SO', 'DI'])

    return
def grab_detail_id_altmetric_all(folder, dst_folder, doi_column):
    check_file_url(dst_folder)
    file_list = os.listdir(folder)
    for file_ in file_list:
        print file_
        file_url = (folder if str(folder).endswith(os.path.sep) else
                    (folder + os.path.sep)) + file_
        dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else
                   (dst_folder + os.path.sep)) + file_
        grab_detail_id_altmetric(file_url, dst_url, doi_column)

    return
def grab_springer_all(folder, dst_folder, doi_column):
    check_file_url(dst_folder)
    file_list = os.listdir(folder)
    finished_list = os.listdir(dst_folder)
    for file_ in file_list:
        if file_ in finished_list:
            continue
        print file_
        file_url = (folder if str(folder).endswith(os.path.sep) else (folder + os.path.sep)) + file_
        dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else (dst_folder + os.path.sep)) + file_
        grab_springer_info(file_url, dst_url, doi_column)

    return
# grab_springer_info('data/outputs/or64_elsevier.xlsx', 'data/outputs/or64_elsevier_y.xlsx', 'DOI')
def cal_file_folder(folder, dst_folder, col_names):
    check_file_url(dst_folder)
    file_list = os.listdir(folder)
    finished_list = os.listdir(dst_folder)
    for file_ in file_list:
        if (folder != dst_folder) and (file_ in finished_list):
            continue
        print file_
        file_url = (folder if str(folder).endswith(os.path.sep) else
                    (folder + os.path.sep)) + file_
        dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else
                   (dst_folder + os.path.sep)) + file_
        cal_file(file_url, dst_url, col_names)

    return
def grab_detail_altmetric_all(folder, dst_folder, citation_id_column):
    check_file_url(dst_folder)
    file_list = os.listdir(folder)
    finished_list = os.listdir(dst_folder)
    for file_ in file_list:
        if file_ in finished_list:
            continue
        print file_
        file_url = (folder if str(folder).endswith(os.path.sep) else
                    (folder + os.path.sep)) + file_
        dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else
                   (dst_folder + os.path.sep)) + file_
        grab_detail_altmetric(file_url, dst_url, citation_id_column)

    return
示例#8
0
def grab_scopus_eid_all(folder, dst_folder, doi_column):
    check_file_url(dst_folder)
    file_list = os.listdir(folder)
    finished_list = os.listdir(dst_folder)
    for file_ in file_list:
        if file_ in finished_list:
            continue
        print file_
        file_url = (folder if str(folder).endswith(os.path.sep) else
                    (folder + os.path.sep)) + file_
        dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else
                   (dst_folder + os.path.sep)) + file_
        grab_scopus_eid(file_url, dst_url, doi_column)

    return
def cal_file_folder_sub_impact_df(folder, dst_folder, dst_f_name, tc='TC'):
    check_file_url(dst_folder)

    file_list = os.listdir(folder)
    df_save = pd.DataFrame()
    for file_ in file_list:
        print file_
        file_url = (folder if str(folder).endswith(os.path.sep) else
                    (folder + os.path.sep)) + file_
        df = cal_file_sub_impact(file_url, tc=tc)

        df_save = df_save.append(df)

    dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else
               (dst_folder + os.path.sep)) + dst_f_name
    df_save.to_excel(dst_url, index=False)
def grab_altmetric_total_score_all(folder, dst_folder, doi_column):
    check_file_url(dst_folder)
    file_list = os.listdir(folder)
    finished_list = os.listdir(dst_folder)
    for file_ in file_list:
        if (folder != dst_folder) and (file_ in finished_list):
            continue
        print file_
        file_url = (folder if str(folder).endswith(os.path.sep) else
                    (folder + os.path.sep)) + file_
        dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else
                   (dst_folder + os.path.sep)) + file_
        grab_altmetric_total_score(file_url, dst_url, doi_column)

    return


# gadc.grab_detail_id_altmetric_all(u'data/outputs/数学期刊(下)/'.encode('utf-8'), u'data/outputs/数学期刊(下)/'.encode('utf-8'), 'DI')
# gadc.grab_detail_altmetric_all(u'data/outputs/数学期刊(下)/'.encode('utf-8'), u'data/outputs/数学期刊(下)_altmetric/'.encode('utf-8'), 'citation_id')
示例#11
0
def grab_mendeley_views_all(folder, dst_folder, eid_column):
    check_file_url(dst_folder)
    file_list = os.listdir(folder)
    for file_ in file_list:
        print file_
        if not str(file_).endswith('xlsx'):
            continue
        file_url = (folder if str(folder).endswith(os.path.sep) else
                    (folder + os.path.sep)) + file_
        dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else
                   (dst_folder + os.path.sep)) + file_
        grab_mendeley_views(file_url, dst_url, eid_column)

    return


# grab_mendeley_views('data/outputs/or64_elsevier.xlsx', 'data/outputs/or64_elsevier_x.xlsx', 'eid', end_year=2018)
# ge.grab_scopus_eid_all(u'data/outputs/能源化工元源数据文件/'.encode('utf-8'), u'data/outputs/能源化工元源数据文件_elsevier/'.encode('utf-8'), 'DI')
# ge.grab_mendeley_views_all(u'data/outputs/能源化工元源数据文件_elsevier/'.encode('utf-8'), u'data/outputs/能源化工元源数据文件_elsevier/'.encode('utf-8'), 'eid')
示例#12
0
def merge_all(alt_folder, plu_folder, els_folder, spr_folder, dst_folder):
    check_file_url(dst_folder)
    file_list = os.listdir(alt_folder)
    for file_ in file_list:
        print file_
        if not str(file_).endswith('xlsx'):
            continue
        alt_file_url = (alt_folder if str(alt_folder).endswith(os.path.sep)
                        else (alt_folder + os.path.sep)) + file_
        plu_file_url = (plu_folder if str(plu_folder).endswith(os.path.sep)
                        else (plu_folder + os.path.sep)) + file_
        els_file_url = (els_folder if str(els_folder).endswith(os.path.sep)
                        else (els_folder + os.path.sep)) + file_
        spr_file_url = (spr_folder if str(spr_folder).endswith(os.path.sep)
                        else (spr_folder + os.path.sep)) + file_
        dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else
                   (dst_folder + os.path.sep)) + file_
        merge_alt_plu_els_spr(alt_file_url,
                              plu_file_url,
                              els_file_url,
                              spr_file_url,
                              dst_url,
                              merge_on=['SO', 'DI'])