Exemplo n.º 1
0
def load_data(aminer, linkedin):
    mysql = Mysql()
    mongo = Mongo()
    import pickle
    aminer = pickle.load(open("D:\\Users\\chenwei\\script\\aminer_two"))
    linkedin = pickle.load(open("D:\\Users\\chenwei\\script\\linkedin_two_filter"))
    print aminer.number_of_nodes()
    print linkedin.number_of_nodes()
    ids = []
    profiles = []
    type = []
    
    index= 0
    for i in aminer.nodes():
        verbose.index(index)
        index+=1
        ids.append(int(i))
        profile = ""
        try:
            profile = mysql.get_person_aminer_profile(i)
        except Exception,e:
            print e
            try:
                print i
            except Exception,e:
                print e
def process_aminer(docs):
    mysql = Mysql()
    people = mysql.fetch_person()
    index = 0
    for row in people:
        data = ""
        if index % 10000 == 0:
            print index
        index+=1
#        verbose.debug(row[0])
        docs['id'].append(row[0])
        docs['type'].append(1)
        data+=(row[1]+'\n')
        if row[2]!= -1:
            mysql.cur.execute("SELECT * FROM contact_info c WHERE c.id = '"+str(row[2])+"'")
            contact = mysql.cur.fetchall()
            for c in contact:
                for i in [1,4,5,7,8,14,15,17,18,20,21,22,24]:
                    if c[i]!=None:
                        try:
                            data+=(str(c[i])+'\n')
                        except:
                            try:
                                data+=(str(c[i])+'\n')
                            except Exception, e:
                                print e
        mysql.cur.execute("SELECT * FROM na_person_organization o WHERE o.aid = "+str(row[0]))
        organization = mysql.cur.fetchall()
        for o in organization:
            data+=str(o[4])
        docs['data'].append(UnicodeDammit(data.replace(","," ")).markup)
'''
Created on Dec 20, 2012

@author: Yutao
'''
from src.database.mysql import Mysql

if __name__ == "__main__":
    mysql = Mysql()
    missing_data, person_missing_data = mysql.get_missing_data()
    import pickle
    m_dump = open("missing_data1", 'w')
    p_dump = open("person_missing_data1", 'w')
    pickle.dump(missing_data, m_dump)
    pickle.dump(person_missing_data, p_dump)