def merge_altmetric_plumx_all(alt_folder, plu_folder, dst_folder): check_file_url(dst_folder) file_list = os.listdir(alt_folder) for file_ in file_list: print file_ if not str(file_).endswith('xlsx'): continue alt_file_url = (alt_folder if str(alt_folder).endswith(os.path.sep) else (alt_folder + os.path.sep)) + file_ plu_file_url = (plu_folder if str(plu_folder).endswith(os.path.sep) else (plu_folder + os.path.sep)) + file_ dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else (dst_folder + os.path.sep)) + file_ merge_altmetric_plumx(alt_file_url, plu_file_url, dst_url, alt_columns=[ 'TC', 'twitter', 'facebook', 'wikipedia', 'redditors', 'f1000' ], plu_columns=[ 'twitter', 'facebook', 'reference_count_wikipedia', 'comment_count_reddit' ], merge_on=['SO', 'DI']) return
def merge_plumx_elsevier_springer_views_all(plu_folder, els_folder, spr_folder, dst_folder): check_file_url(dst_folder) file_list = os.listdir(plu_folder) for file_ in file_list: print file_ if not str(file_).endswith('xlsx'): continue plu_file_url = (plu_folder if str(plu_folder).endswith(os.path.sep) else (plu_folder + os.path.sep)) + file_ els_file_url = (els_folder if str(els_folder).endswith(os.path.sep) else (els_folder + os.path.sep)) + file_ spr_file_url = (spr_folder if str(spr_folder).endswith(os.path.sep) else (spr_folder + os.path.sep)) + file_ dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else (dst_folder + os.path.sep)) + file_ merge_plumx_elsevier_springer_views( plu_file_url, els_file_url, spr_file_url, dst_url, plu_columns=['abstruct_views', 'full_text_views', 'exports_saves'], els_columns=['views', 'citations'], spr_columns=['downloads', 'citationsx'], merge_on=['SO', 'DI']) return
def merge_altmetric_plumx_plos_all(alt_folder, plu_folder, plos_folder, dst_folder): check_file_url(dst_folder) file_list = os.listdir(alt_folder) for file_ in file_list: print file_ if not (str(file_).endswith('xlsx') or str(file_).endswith('csv')): continue alt_file_url = (alt_folder if str(alt_folder).endswith(os.path.sep) else (alt_folder + os.path.sep)) + file_ plu_file_url = (plu_folder if str(plu_folder).endswith(os.path.sep) else (plu_folder + os.path.sep)) + file_ plos_file_url = (plos_folder if str(plos_folder).endswith(os.path.sep) else (plos_folder + os.path.sep)) + file_ dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else (dst_folder + os.path.sep)) + file_ merge_altmetric_plumx_plos( alt_file_url, plu_file_url, plos_file_url, dst_url, alt_columns=[ 'Alt_Tweeters', 'Alt_Facebook_Pages', 'Alt_Wikipedia_Pages ', 'Alt_Redditors' ], plu_columns=['soical_FACEBOOK_COUNT', 'soical_TWEET_COUNT'], plos_columns=['Twitter Total', 'Wikipedia Total'], merge_on=['SO', 'DI']) return
def grab_detail_id_altmetric_all(folder, dst_folder, doi_column): check_file_url(dst_folder) file_list = os.listdir(folder) for file_ in file_list: print file_ file_url = (folder if str(folder).endswith(os.path.sep) else (folder + os.path.sep)) + file_ dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else (dst_folder + os.path.sep)) + file_ grab_detail_id_altmetric(file_url, dst_url, doi_column) return
def grab_springer_all(folder, dst_folder, doi_column): check_file_url(dst_folder) file_list = os.listdir(folder) finished_list = os.listdir(dst_folder) for file_ in file_list: if file_ in finished_list: continue print file_ file_url = (folder if str(folder).endswith(os.path.sep) else (folder + os.path.sep)) + file_ dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else (dst_folder + os.path.sep)) + file_ grab_springer_info(file_url, dst_url, doi_column) return # grab_springer_info('data/outputs/or64_elsevier.xlsx', 'data/outputs/or64_elsevier_y.xlsx', 'DOI')
def cal_file_folder(folder, dst_folder, col_names): check_file_url(dst_folder) file_list = os.listdir(folder) finished_list = os.listdir(dst_folder) for file_ in file_list: if (folder != dst_folder) and (file_ in finished_list): continue print file_ file_url = (folder if str(folder).endswith(os.path.sep) else (folder + os.path.sep)) + file_ dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else (dst_folder + os.path.sep)) + file_ cal_file(file_url, dst_url, col_names) return
def grab_detail_altmetric_all(folder, dst_folder, citation_id_column): check_file_url(dst_folder) file_list = os.listdir(folder) finished_list = os.listdir(dst_folder) for file_ in file_list: if file_ in finished_list: continue print file_ file_url = (folder if str(folder).endswith(os.path.sep) else (folder + os.path.sep)) + file_ dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else (dst_folder + os.path.sep)) + file_ grab_detail_altmetric(file_url, dst_url, citation_id_column) return
def grab_scopus_eid_all(folder, dst_folder, doi_column): check_file_url(dst_folder) file_list = os.listdir(folder) finished_list = os.listdir(dst_folder) for file_ in file_list: if file_ in finished_list: continue print file_ file_url = (folder if str(folder).endswith(os.path.sep) else (folder + os.path.sep)) + file_ dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else (dst_folder + os.path.sep)) + file_ grab_scopus_eid(file_url, dst_url, doi_column) return
def cal_file_folder_sub_impact_df(folder, dst_folder, dst_f_name, tc='TC'): check_file_url(dst_folder) file_list = os.listdir(folder) df_save = pd.DataFrame() for file_ in file_list: print file_ file_url = (folder if str(folder).endswith(os.path.sep) else (folder + os.path.sep)) + file_ df = cal_file_sub_impact(file_url, tc=tc) df_save = df_save.append(df) dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else (dst_folder + os.path.sep)) + dst_f_name df_save.to_excel(dst_url, index=False)
def grab_altmetric_total_score_all(folder, dst_folder, doi_column): check_file_url(dst_folder) file_list = os.listdir(folder) finished_list = os.listdir(dst_folder) for file_ in file_list: if (folder != dst_folder) and (file_ in finished_list): continue print file_ file_url = (folder if str(folder).endswith(os.path.sep) else (folder + os.path.sep)) + file_ dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else (dst_folder + os.path.sep)) + file_ grab_altmetric_total_score(file_url, dst_url, doi_column) return # gadc.grab_detail_id_altmetric_all(u'data/outputs/数学期刊(下)/'.encode('utf-8'), u'data/outputs/数学期刊(下)/'.encode('utf-8'), 'DI') # gadc.grab_detail_altmetric_all(u'data/outputs/数学期刊(下)/'.encode('utf-8'), u'data/outputs/数学期刊(下)_altmetric/'.encode('utf-8'), 'citation_id')
def grab_mendeley_views_all(folder, dst_folder, eid_column): check_file_url(dst_folder) file_list = os.listdir(folder) for file_ in file_list: print file_ if not str(file_).endswith('xlsx'): continue file_url = (folder if str(folder).endswith(os.path.sep) else (folder + os.path.sep)) + file_ dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else (dst_folder + os.path.sep)) + file_ grab_mendeley_views(file_url, dst_url, eid_column) return # grab_mendeley_views('data/outputs/or64_elsevier.xlsx', 'data/outputs/or64_elsevier_x.xlsx', 'eid', end_year=2018) # ge.grab_scopus_eid_all(u'data/outputs/能源化工元源数据文件/'.encode('utf-8'), u'data/outputs/能源化工元源数据文件_elsevier/'.encode('utf-8'), 'DI') # ge.grab_mendeley_views_all(u'data/outputs/能源化工元源数据文件_elsevier/'.encode('utf-8'), u'data/outputs/能源化工元源数据文件_elsevier/'.encode('utf-8'), 'eid')
def merge_all(alt_folder, plu_folder, els_folder, spr_folder, dst_folder): check_file_url(dst_folder) file_list = os.listdir(alt_folder) for file_ in file_list: print file_ if not str(file_).endswith('xlsx'): continue alt_file_url = (alt_folder if str(alt_folder).endswith(os.path.sep) else (alt_folder + os.path.sep)) + file_ plu_file_url = (plu_folder if str(plu_folder).endswith(os.path.sep) else (plu_folder + os.path.sep)) + file_ els_file_url = (els_folder if str(els_folder).endswith(os.path.sep) else (els_folder + os.path.sep)) + file_ spr_file_url = (spr_folder if str(spr_folder).endswith(os.path.sep) else (spr_folder + os.path.sep)) + file_ dst_url = (dst_folder if str(dst_folder).endswith(os.path.sep) else (dst_folder + os.path.sep)) + file_ merge_alt_plu_els_spr(alt_file_url, plu_file_url, els_file_url, spr_file_url, dst_url, merge_on=['SO', 'DI'])