Пример #1
0
dir="f:\\data\\MAG-2016kdd\\MicrosoftAcademicGraph\\output\\"
time_start=time.time()

dest_file= dir + "citation_%s_%s.txt"

sql_str_roots="select distinct rootid, rootdesc from fieldswithroot"
sql_str_subfield_of_one_root="select id from fieldswithroot where rootid='%s' order by id ASC "
sql_str_papers_of_all_subfields="select paperid from paperkeywords where fieldid='%s' order by paperid ASC "
sql_str_pubyear_of_all_papers="select paperid,refid from paperreferences where refid='%s' order by id ASC"
sql_str_citation_of_all_papers="select paperid,refid from paperreferences where refid='%s' order by id ASC"

#select b.id afrom paperkeywords a, papers b wehre i a.paperid=b.papers.id


roots=[]
roots= db.get_query_results(sql_str_roots)
time_end=time.time()
logger.info('Done: Read Roots from DB! Time cost:%d s', time_end - time_start)


rootcount = 0
dict_fields_info= {}

#for root in roots:
#    print(dict_fields_info["%s" % root])
#exit()
for root in roots:
    dest_file_root = dest_file % (root[1],root[0])
    rootcount+=1
    f_dest = open(dest_file_root, encoding='UTF-8', mode='w', errors='ignore')
Пример #2
0
#dir="D:\\data\\MAG\\output\\"
time_start = time.time()

dest_file = dir + "paper_%s.txt"
#dest_citation_file= dir + "citing_%s_%s.txt"

sql_str_subfield = "select distinct id from fieldswithroot order by id ASC "
sql_str_papers_of_all_subfields = "select paperid,pubyear from paperkeywordswithyear where fieldid='%s' order by pubyear ASC "
#sql_str_pubyear_of_all_papers="select id,pubyear from papers where id='%s' order by id ASC"
#sql_str_pubyear_of_all_papers_in="select id,pubyear from papers where id in (%s)  order by id ASC"
#sql_str_citations_of_all_papers="select paperid,refid from  paperreferences where refid='%s' order by paperid ASC"

#select b.id afrom paperkeywords a, papers b wehre i a.paperid=b.papers.id

fields = []
fields = db.get_query_results(sql_str_subfield)
time_end = time.time()
logger.info('Done: Read Roots from DB! Time cost:%d s', time_end - time_start)

num_subfields_of_all_roots = 0
#dict_fields_info= {}

#for root in roots:
#    print(dict_fields_info["%s" % root])
#exit()
for field in fields:
    #   dest_citation_file_root = dest_citation_file % (root[1],root[0])
    num_subfields_of_all_roots += 1
    #    if num_subfields_of_one_root==1:
    #        continue
    dest_file_field = dest_file % (field)
Пример #3
0
#logger.addHandler(fh)

#dir="f:\\data\\MAG-2016kdd\\MicrosoftAcademicGraph\\"
dir = "/home/zico/mag/"
time_start = time.time()

#dest_file= dir + "output_SubOfFirstField.txt"
#f_dest = open(dest_file, encoding='UTF-8', mode='w', errors='ignore')

sql_str_roots = "select distinct parentid from fieldofstudyhierarchy where parentlevel='L0'"
sql_str_all_fields = "select id, description from fieldsofstudies "
sql_get_all_subfild = "select getChild('%s')"
sql_str = 'insert into fieldswithroot(id,thisdesc,rootid,rootdesc) values(%s,%s,%s,%s)'

roots = []
roots = db.get_query_results(sql_str_roots)
time_end = time.time()
logger.info('Done: Read Roots from DB! Time cost:%d s', time_end - time_start)
#logger.info(roots[0])

all_fileds = []
all_fileds = db.get_query_results(sql_str_all_fields)
time_end = time.time()
logger.info('Done: Read All Fields from DB! Time cost:%d s',
            time_end - time_start)
#logger.info(all_fileds[0])
linecount = 0
dict_fields_info = {}

for field in all_fileds:
    dict_fields_info.update({field[0]: field[1]})