Exemplo n.º 1
0
def __dumpId2IdList(filename, id2IdList):
    fif.resetFile(filename)
    print "dump each record line ..."
    for iid, idlist in id2IdList.items():
        if not idlist: continue  # could be empty for author->org
        for iiid in idlist:
            line = '%s,%s' % (iid, iiid)
            fif.addToFile(filename, line, isline=1)
Exemplo n.º 2
0
def overwrite():
    hash2pid = readPaperHash2Id()
    badRecord = '!NO HASH IN HASH2PID!'
    fif.resetFile(FilePid2Term)
    for pt in psr.paperterms():
        hashid, termTtl, termAbs = pt[0], pt[1], pt[2]
        if hashid in hash2pid:
            pid = hash2pid[hashid]
            tterm = ';'.join(termTtl)
            aterm = ';'.join(termAbs)
            text = '%s!%s!%s' % (pid, tterm, aterm)
            fif.addToFile(FilePid2Term, text, isline=1)
        else:
            badRecord += "%s\n" % hashid
    with open('BadHash2Pid.txt', 'w') as f:
        f.write(badRecord)
Exemplo n.º 3
0
def makeSqlTblPaper():

    print "\nmake === PAPER TABLE ==="

    # paperid, year, publisher, title, abstract, authors
    fif.resetFile(FileSqlPaper)
    print "dump each record line ..."
    for meta in readin.papermeta():
        iid = meta[1]
        year = meta[2]
        pub = sqlization(meta[3])
        pubid = sqlization(meta[4])
        ttl = sqlization(meta[5])
        abstr = sqlization(meta[6])
        authors = sqlization(meta[7])
        # save them to the csv
        line = ','.join([iid, year, pub, pubid, ttl, abstr, authors])
        fif.addToFile(FileSqlPaper, line, isline=1)
Exemplo n.º 4
0
def makeSqlTblOrg():
    fif.resetFile(FileSqlOrg)
    # id, org-department, orgnization, city, country, georawtext
    for data in readin.getOrgId2Data():
        try:
            iid, depart, org, city, country, geo = data
            #print "orgline", iid, depart
            #print "       > org= ", org
            #print "       > city=", city
            #print "       > co=  ", country
            #print "       > geo= ", geo
            line = '%s,%s,%s,%s,%s,%s' % (iid, sqlization(depart),
                                          sqlization(org), city, country,
                                          sqlization(geo))
            line = '%s,%s,%s,%s,%s' % (iid, sqlization(depart),
                                       sqlization(org), city, country)
            fif.addToFile(FileSqlOrg, line, isline=1)
        except:
            print("BADLINE when making sql for organization", line)
    print "- DONE"
Exemplo n.º 5
0
def dump2file():

    print "dump file", FileTblAuthor
    for rec in AuthorName2Id.nextRecText():
        fif.addToFile(FileTblAuthor, rec, isline=1)

    print "dump file", FileTblDepart
    for rec in Depart_Name2Id.nextRecText():
        fif.addToFile(FileTblDepart, rec, isline=1)

    print "dump file", FileTblPub
    for rec in Publisher_Name2Id.nextRecText():
        fif.addToFile(FileTblPub, rec, isline=1)
Exemplo n.º 6
0
def _dumpId2Name(id2name, filename):
    fif.resetFile(filename)
    print "dump each record line ..."
    for iid, name in id2name.items():
        line = '%s,%s' % (iid, sqlization(name))
        fif.addToFile(filename, line, isline=1)