def GetData(queue): lastid ='0' page_size=1000; icount = 0 while True: print 'start handling lastid....'+str(lastid) lastid_new = lastid conn=MySQLdb.connect(host='10.10.107.4',user='******',passwd='000000',port=3306,db="zi") if queue.full(): time.sleep(3) continue strsql = 'select * from T_PROD where prod_type = 1 and prod_id > %s limit 0, %s' conn.set_character_set('utf8') cur = conn.cursor() try: cur.execute(strsql,(lastid,page_size)) data =cur.fetchall() if data: lastid_new = data[-1][0] icount = icount+len(data) queue.put(data) print str(os.getpid()) + '(put):' + str(time.time())+".lastid:"+str(lastid)+",rows:"+str(icount) except Exception, e: print e libes2.writeLog('T_PROD error lastid:' + str(lastid)+',ERROR:'+str(e)) finally:
def write_to_csv(lines): writer = csv.writer(open('./all_patents.csv',"ab"),quoting=csv.QUOTE_ALL) try: writer.writerows(lines) except Exception,e: print e libes2.writeLog('write to file db error:,ERROR:'+str(e))
def GetData(queue): lastid =0 page_size=1000; while True: print 'start handling lastid....'+str(lastid) lastid_new = lastid conn=MySQLdb.connect(host='10.10.100.31',user='******',passwd='xmmiou8015',port=3306,db="trademark") if queue.full(): time.sleep(3) continue strsql = 'select int_id,str_reg_id from t_image where int_id > %s limit 0, %s' conn.set_character_set('utf8') cur = conn.cursor() try: cur.execute(strsql,(lastid,page_size)) data =cur.fetchall() if data: lastid_new = data[-1][0] queue.put(data) print str(os.getpid()) + '(put):' + str(time.time())+".lastid:"+str(lastid) except Exception, e: print e libes2.writeLog('t_image error lastid:' + str(lastid)+',ERROR:'+str(e)) finally:
def write_to_csv(lines): writer = csv.writer(open('./org_alixls_db.csv', "ab"), quoting=csv.QUOTE_ALL) try: writer.writerows(lines) except Exception, e: print e libes2.writeLog('insert db error:,ERROR:' + str(e))
def write_to_csv(fpath, line): writer = csv.writer(open(fpath, "ab"), quoting=csv.QUOTE_ALL) try: writer.writerow(line) except Exception, e: print e libes2.writeLog('insert db error:,ERROR:' + str(e))
def outputQ(queue,lock): while True: k=0 if queue.empty(): if k >30: break libes2.writeLog('all finished') time.sleep(3) k +=1 continue lastid = 0 rlist = queue.get() lines = [] for a in rlist: lastid = a[0] line = list(a) if True: line[21]=get_str(line[21]) line.append(line[21]) #region_temp_str line[5] = get_str(line[5]) line[17] = get_str(line[17]) line[16]=str(hashlib.md5(line[17]).hexdigest())+str(len(line[17])) line[19]= get_str(line[19]) if(line[19].find('|')>0): line.append(line[19].split('|')[0]) #temp postcode line[19]=line[19].split('|')[1] else: line.append('') line[21]='' line[48]=get_str(line[48])#48.remark line[49]=get_str(line[49]) lines.append(line) write_to_csv(lines) print 'write to csv finished ,lastid:'+str(lastid)
def outputQ(queue,lock): db=client.zi brand=db.brand while True: k=0 if queue.empty(): if k >30: break libes2.writeLog('all finished') time.sleep(3) k +=1 continue lastid = 0 rlist = queue.get() lines = [] for a in rlist: _id = a[1] lastid = a[0] line = [] _t = brand.find_one({"_id":_id},{'prod_name':1,'brand_cat_ids':1,'brand_group_list':1,'is_invalid':1}) if _t: line.append(lastid) line.append(_id) line.append('03') line.append('100') line.append(get_inner_str(_t.get("brand_cat_ids"))) line.append(get_inner_str(_t.get("brand_group_list"))) line.append('1') line.append(_t.get("is_invalid")) line.append(0) line.append(_t.get("prod_name")) line.append(0) line.append(0) lines.append(line) write_to_csv(lines) print 'write to csv finished ,lastid:'+str(lastid)
def GetData(filepath): try: for fd in os.listdir(filepath): #handle file first. newpath = filepath + fd if os.path.isfile(newpath): try: write_to_db(newpath) #libes2.writeLastid(newpath) except Exception, e: print e libes2.writeLog('ERROR WHEN INSERT: CAUGHT FILE:' + newpath + ",ERROR:" + str(e)) else: GetData(newpath + "/") libes2.writeLog('update folder finished ' + newpath) except Exception, e: print e
queue.put(data) print str(os.getpid()) + '(put):' + str(time.time( )) + ".lastid:" + str(lastid) + ",rows:" + str(icount) except Exception, e: print e libes2.writeLog('T_PROD error lastid:' + str(lastid) + ',ERROR:' + str(e)) finally: cur.close() conn.close() if not lastid_new == lastid: lastid = lastid_new elif icount > 18000000: libes2.writeLog('T_PROD finished lastid:' + str(lastid)) def get_inner_str(t): if t is None or str(t) == "" or len(str(t)) == 0: return "0" elif type(t) == list: return ",".join(t) else: return str(t) def get_str(a): if a is None: return '' else: return str(a).replace('\r', '').replace('\n', '<br/>')
def write_to_db(fullpath): print 'start handling path:' + fullpath reader = UnicodeReader(open(fullpath)) #conn=MySQLdb.connect(host='localhost',user='******',passwd='000000',port=3306,db="zi") #conn.set_character_set('utf8') #cur = conn.cursor() try: i = 1 tmpdic = {} idcol = 1 rlist = [] for line in reader: #line.decode('gb3212').encode('utf-8') if i == 1: ''' with open('./alltales_head_01.txt','ab') as f: f.write(','.join(line) +"\r\n") break ''' COL_N = 0 for th in line: if globalDic.has_key(th): DB_N = str(globalDic[th]) tmpdic[DB_N] = COL_N if globalDic[th] == '1': idcol = COL_N COL_N += 1 print 'idcol:' + str(idcol) i += 1 continue tlist = [] for DB_N in range(24): #the number of the db want. str1 = None if tmpdic.has_key(str(DB_N)): COL_N = tmpdic[str(DB_N)] str1 = line[COL_N].replace('\n', '').replace('\r', '<br/>') if DB_N == 0: tlist.append( str(hashlib.md5(line[idcol]).hexdigest()) + str(len(line[idcol]))) elif DB_N == 22: tlist.append(fullpath) #remark elif DB_N == 16: tlist.append(getnumbers(str1)) elif DB_N == 20: tlist.append(getdate_temp(str1)) elif DB_N > 16 and DB_N < 20: tlist.append(getlastnumbers(str1)) else: tlist.append(str1) #add region tlist.append(get_geo(tlist[5], tlist[12], tlist[8])) #print DB_N #print tlist try: ''' strsql = 'insert into zi_common_org(org_id,org_name_cn,org_desc,uname_contact,uposition_contact,Extel,Tel,Fax,mobile,email,uname_holder,org_addr_cn,postcode,org_gov_bzscope,ZIIndustryDesc,org_gov_type,org_gov_bzmode,registered_capital,org_revenues,org_pers_count,date_foundation,region_temp_str,remark,website,geo_region_id) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)' cur.execute(strsql,tlist) conn.commit() ''' rlist.append(tlist) if i % 10000 == 0: write_to_csv(rlist) except Exception, e: print e libes2.writeLog('insert db error:' + str(line[2]) + ',ERROR:' + str(e)) finally: #print 'handled......rows:'+str(i) i += 1
port=3306, db="zi") strsql = 'select region_id,Areacode,postcode,MobileNumber from dm_mobile' conn.set_character_set('utf8') cur = conn.cursor() try: cur.execute(strsql) data = cur.fetchall() for a in data: if not globalMoblieDic.has_key(a[3].strip()): globalMoblieDic[a[3].strip()] = a[0].strip() if not globalExtelDic.has_key(a[1].strip()): globalExtelDic[a[1].strip()] = a[0].strip() if not globalPostDic.has_key(a[2].strip()): globalPostDic[a[2].strip()] = a[0].strip() del data except Exception, e: print e libes2.writeLog('t_db error,ERROR:' + str(e)) finally: cur.close() conn.close() # input processes filepath = u"./alixls/" GetData(filepath) #print getdate_temp(u"2005 年")
lastid_new = data[-1][0] queue.put(data) print str(os.getpid()) + '(put):' + str(time.time())+".lastid:"+str(lastid) except Exception, e: print e libes2.writeLog('t_image error lastid:' + str(lastid)+',ERROR:'+str(e)) finally: cur.close() conn.close() if not lastid_new == lastid: lastid = lastid_new elif lastid==16283491: libes2.writeLog('t_image finished lastid:' + str(lastid)) def get_inner_str(t): if t is None or str(t)=="" or len(str(t))==0: return "0" elif type(t)== list: return ",".join(t) else: return str(t) # output worker def outputQ(queue,lock):