def __dumpId2IdList(filename, id2IdList): fif.resetFile(filename) print "dump each record line ..." for iid, idlist in id2IdList.items(): if not idlist: continue # could be empty for author->org for iiid in idlist: line = '%s,%s' % (iid, iiid) fif.addToFile(filename, line, isline=1)
def overwrite(): hash2pid = readPaperHash2Id() badRecord = '!NO HASH IN HASH2PID!' fif.resetFile(FilePid2Term) for pt in psr.paperterms(): hashid, termTtl, termAbs = pt[0], pt[1], pt[2] if hashid in hash2pid: pid = hash2pid[hashid] tterm = ';'.join(termTtl) aterm = ';'.join(termAbs) text = '%s!%s!%s' % (pid, tterm, aterm) fif.addToFile(FilePid2Term, text, isline=1) else: badRecord += "%s\n" % hashid with open('BadHash2Pid.txt', 'w') as f: f.write(badRecord)
def makeSqlTblPaper(): print "\nmake === PAPER TABLE ===" # paperid, year, publisher, title, abstract, authors fif.resetFile(FileSqlPaper) print "dump each record line ..." for meta in readin.papermeta(): iid = meta[1] year = meta[2] pub = sqlization(meta[3]) pubid = sqlization(meta[4]) ttl = sqlization(meta[5]) abstr = sqlization(meta[6]) authors = sqlization(meta[7]) # save them to the csv line = ','.join([iid, year, pub, pubid, ttl, abstr, authors]) fif.addToFile(FileSqlPaper, line, isline=1)
def makeSqlTblOrg(): fif.resetFile(FileSqlOrg) # id, org-department, orgnization, city, country, georawtext for data in readin.getOrgId2Data(): try: iid, depart, org, city, country, geo = data #print "orgline", iid, depart #print " > org= ", org #print " > city=", city #print " > co= ", country #print " > geo= ", geo line = '%s,%s,%s,%s,%s,%s' % (iid, sqlization(depart), sqlization(org), city, country, sqlization(geo)) line = '%s,%s,%s,%s,%s' % (iid, sqlization(depart), sqlization(org), city, country) fif.addToFile(FileSqlOrg, line, isline=1) except: print("BADLINE when making sql for organization", line) print "- DONE"
def dump2file(): print "dump file", FileTblAuthor for rec in AuthorName2Id.nextRecText(): fif.addToFile(FileTblAuthor, rec, isline=1) print "dump file", FileTblDepart for rec in Depart_Name2Id.nextRecText(): fif.addToFile(FileTblDepart, rec, isline=1) print "dump file", FileTblPub for rec in Publisher_Name2Id.nextRecText(): fif.addToFile(FileTblPub, rec, isline=1)
def _dumpId2Name(id2name, filename): fif.resetFile(filename) print "dump each record line ..." for iid, name in id2name.items(): line = '%s,%s' % (iid, sqlization(name)) fif.addToFile(filename, line, isline=1)