def load_data(aminer, linkedin): mysql = Mysql() mongo = Mongo() import pickle aminer = pickle.load(open("D:\\Users\\chenwei\\script\\aminer_two")) linkedin = pickle.load(open("D:\\Users\\chenwei\\script\\linkedin_two_filter")) print aminer.number_of_nodes() print linkedin.number_of_nodes() ids = [] profiles = [] type = [] index= 0 for i in aminer.nodes(): verbose.index(index) index+=1 ids.append(int(i)) profile = "" try: profile = mysql.get_person_aminer_profile(i) except Exception,e: print e try: print i except Exception,e: print e
def process_aminer(docs): mysql = Mysql() people = mysql.fetch_person() index = 0 for row in people: data = "" if index % 10000 == 0: print index index+=1 # verbose.debug(row[0]) docs['id'].append(row[0]) docs['type'].append(1) data+=(row[1]+'\n') if row[2]!= -1: mysql.cur.execute("SELECT * FROM contact_info c WHERE c.id = '"+str(row[2])+"'") contact = mysql.cur.fetchall() for c in contact: for i in [1,4,5,7,8,14,15,17,18,20,21,22,24]: if c[i]!=None: try: data+=(str(c[i])+'\n') except: try: data+=(str(c[i])+'\n') except Exception, e: print e mysql.cur.execute("SELECT * FROM na_person_organization o WHERE o.aid = "+str(row[0])) organization = mysql.cur.fetchall() for o in organization: data+=str(o[4]) docs['data'].append(UnicodeDammit(data.replace(","," ")).markup)
''' Created on Dec 20, 2012 @author: Yutao ''' from src.database.mysql import Mysql if __name__ == "__main__": mysql = Mysql() missing_data, person_missing_data = mysql.get_missing_data() import pickle m_dump = open("missing_data1", 'w') p_dump = open("person_missing_data1", 'w') pickle.dump(missing_data, m_dump) pickle.dump(person_missing_data, p_dump)