dir="f:\\data\\MAG-2016kdd\\MicrosoftAcademicGraph\\output\\" time_start=time.time() dest_file= dir + "citation_%s_%s.txt" sql_str_roots="select distinct rootid, rootdesc from fieldswithroot" sql_str_subfield_of_one_root="select id from fieldswithroot where rootid='%s' order by id ASC " sql_str_papers_of_all_subfields="select paperid from paperkeywords where fieldid='%s' order by paperid ASC " sql_str_pubyear_of_all_papers="select paperid,refid from paperreferences where refid='%s' order by id ASC" sql_str_citation_of_all_papers="select paperid,refid from paperreferences where refid='%s' order by id ASC" #select b.id afrom paperkeywords a, papers b wehre i a.paperid=b.papers.id roots=[] roots= db.get_query_results(sql_str_roots) time_end=time.time() logger.info('Done: Read Roots from DB! Time cost:%d s', time_end - time_start) rootcount = 0 dict_fields_info= {} #for root in roots: # print(dict_fields_info["%s" % root]) #exit() for root in roots: dest_file_root = dest_file % (root[1],root[0]) rootcount+=1 f_dest = open(dest_file_root, encoding='UTF-8', mode='w', errors='ignore')
#dir="D:\\data\\MAG\\output\\" time_start = time.time() dest_file = dir + "paper_%s.txt" #dest_citation_file= dir + "citing_%s_%s.txt" sql_str_subfield = "select distinct id from fieldswithroot order by id ASC " sql_str_papers_of_all_subfields = "select paperid,pubyear from paperkeywordswithyear where fieldid='%s' order by pubyear ASC " #sql_str_pubyear_of_all_papers="select id,pubyear from papers where id='%s' order by id ASC" #sql_str_pubyear_of_all_papers_in="select id,pubyear from papers where id in (%s) order by id ASC" #sql_str_citations_of_all_papers="select paperid,refid from paperreferences where refid='%s' order by paperid ASC" #select b.id afrom paperkeywords a, papers b wehre i a.paperid=b.papers.id fields = [] fields = db.get_query_results(sql_str_subfield) time_end = time.time() logger.info('Done: Read Roots from DB! Time cost:%d s', time_end - time_start) num_subfields_of_all_roots = 0 #dict_fields_info= {} #for root in roots: # print(dict_fields_info["%s" % root]) #exit() for field in fields: # dest_citation_file_root = dest_citation_file % (root[1],root[0]) num_subfields_of_all_roots += 1 # if num_subfields_of_one_root==1: # continue dest_file_field = dest_file % (field)
#logger.addHandler(fh) #dir="f:\\data\\MAG-2016kdd\\MicrosoftAcademicGraph\\" dir = "/home/zico/mag/" time_start = time.time() #dest_file= dir + "output_SubOfFirstField.txt" #f_dest = open(dest_file, encoding='UTF-8', mode='w', errors='ignore') sql_str_roots = "select distinct parentid from fieldofstudyhierarchy where parentlevel='L0'" sql_str_all_fields = "select id, description from fieldsofstudies " sql_get_all_subfild = "select getChild('%s')" sql_str = 'insert into fieldswithroot(id,thisdesc,rootid,rootdesc) values(%s,%s,%s,%s)' roots = [] roots = db.get_query_results(sql_str_roots) time_end = time.time() logger.info('Done: Read Roots from DB! Time cost:%d s', time_end - time_start) #logger.info(roots[0]) all_fileds = [] all_fileds = db.get_query_results(sql_str_all_fields) time_end = time.time() logger.info('Done: Read All Fields from DB! Time cost:%d s', time_end - time_start) #logger.info(all_fileds[0]) linecount = 0 dict_fields_info = {} for field in all_fileds: dict_fields_info.update({field[0]: field[1]})