Exemplo n.º 1
0
def GetData(queue):

    lastid ='0'
    page_size=1000;
    icount = 0
    while True:
        print 'start handling lastid....'+str(lastid)
        
        lastid_new = lastid

        conn=MySQLdb.connect(host='10.10.107.4',user='******',passwd='000000',port=3306,db="zi")

        if queue.full():
            time.sleep(3)
            continue


        strsql = 'select * from T_PROD where prod_type = 1 and prod_id  > %s limit 0, %s'
        conn.set_character_set('utf8')
        cur = conn.cursor()
        try:
            cur.execute(strsql,(lastid,page_size))
            data =cur.fetchall()
            if data:
                lastid_new = data[-1][0]
                icount = icount+len(data)
                queue.put(data)
                print str(os.getpid()) + '(put):' + str(time.time())+".lastid:"+str(lastid)+",rows:"+str(icount)

        except Exception, e:
            print e
            libes2.writeLog('T_PROD error lastid:' + str(lastid)+',ERROR:'+str(e))
        finally:
Exemplo n.º 2
0
def write_to_csv(lines):
    writer = csv.writer(open('./all_patents.csv',"ab"),quoting=csv.QUOTE_ALL)
    try:
      writer.writerows(lines)
    except Exception,e:
      print e
      libes2.writeLog('write to file db error:,ERROR:'+str(e))
Exemplo n.º 3
0
def GetData(queue):

    lastid =0
    page_size=1000;
    while True:
        print 'start handling lastid....'+str(lastid)
        
        lastid_new = lastid

        conn=MySQLdb.connect(host='10.10.100.31',user='******',passwd='xmmiou8015',port=3306,db="trademark")

        if queue.full():
            time.sleep(3)
            continue


        strsql = 'select int_id,str_reg_id from t_image where int_id > %s limit 0, %s'
        conn.set_character_set('utf8')
        cur = conn.cursor()
        try:
            cur.execute(strsql,(lastid,page_size))
            data =cur.fetchall()
            if data:
                lastid_new = data[-1][0]
                
                queue.put(data)
                print str(os.getpid()) + '(put):' + str(time.time())+".lastid:"+str(lastid)

        except Exception, e:
            print e
            libes2.writeLog('t_image error lastid:' + str(lastid)+',ERROR:'+str(e))
        finally:
Exemplo n.º 4
0
def write_to_csv(lines):
    writer = csv.writer(open('./org_alixls_db.csv', "ab"),
                        quoting=csv.QUOTE_ALL)
    try:
        writer.writerows(lines)
    except Exception, e:
        print e
        libes2.writeLog('insert db error:,ERROR:' + str(e))
Exemplo n.º 5
0
def write_to_csv(fpath, line):

    writer = csv.writer(open(fpath, "ab"), quoting=csv.QUOTE_ALL)
    try:
        writer.writerow(line)
    except Exception, e:
        print e
        libes2.writeLog('insert db error:,ERROR:' + str(e))
Exemplo n.º 6
0
def outputQ(queue,lock):
    


    while True:
        k=0
        if queue.empty():    
            if k >30:
                break
                libes2.writeLog('all finished')        
            time.sleep(3)
            k +=1
            continue
        lastid = 0

        rlist = queue.get()
        lines = []
        
        for a in rlist:
            lastid = a[0]

            line = list(a)
            
            if True:
                line[21]=get_str(line[21])

                line.append(line[21]) #region_temp_str

                line[5] = get_str(line[5])

                line[17] = get_str(line[17])
                line[16]=str(hashlib.md5(line[17]).hexdigest())+str(len(line[17]))

                line[19]= get_str(line[19])
                
                if(line[19].find('|')>0):
                    line.append(line[19].split('|')[0]) #temp postcode
                    line[19]=line[19].split('|')[1]
                else:
                    line.append('')

                line[21]=''
                line[48]=get_str(line[48])#48.remark
                line[49]=get_str(line[49])

            lines.append(line)
        
        write_to_csv(lines)

        print 'write to csv finished ,lastid:'+str(lastid)
Exemplo n.º 7
0
def outputQ(queue,lock):
    
    db=client.zi
    brand=db.brand

    while True:
        k=0
        if queue.empty():    
            if k >30:
                break
                libes2.writeLog('all finished')        
            time.sleep(3)
            k +=1
            continue
        lastid = 0

        rlist = queue.get()
        lines = []
        
        for a in rlist:
            _id = a[1]
            lastid = a[0]

            line = []
            _t = brand.find_one({"_id":_id},{'prod_name':1,'brand_cat_ids':1,'brand_group_list':1,'is_invalid':1})
            if _t:
                line.append(lastid)
                line.append(_id)
                line.append('03')
                line.append('100')
                line.append(get_inner_str(_t.get("brand_cat_ids")))
                line.append(get_inner_str(_t.get("brand_group_list")))
                line.append('1')
                line.append(_t.get("is_invalid"))
                line.append(0)
                line.append(_t.get("prod_name"))
                line.append(0)
                line.append(0)

            lines.append(line)
        
        write_to_csv(lines)

        print 'write to csv finished ,lastid:'+str(lastid)
Exemplo n.º 8
0
def GetData(filepath):

    try:
        for fd in os.listdir(filepath):
            #handle file first.
            newpath = filepath + fd

            if os.path.isfile(newpath):
                try:
                    write_to_db(newpath)
                    #libes2.writeLastid(newpath)
                except Exception, e:
                    print e
                    libes2.writeLog('ERROR WHEN INSERT: CAUGHT FILE:' +
                                    newpath + ",ERROR:" + str(e))

            else:
                GetData(newpath + "/")

            libes2.writeLog('update folder finished ' + newpath)

    except Exception, e:
        print e
Exemplo n.º 9
0
                queue.put(data)
                print str(os.getpid()) + '(put):' + str(time.time(
                )) + ".lastid:" + str(lastid) + ",rows:" + str(icount)

        except Exception, e:
            print e
            libes2.writeLog('T_PROD error lastid:' + str(lastid) + ',ERROR:' +
                            str(e))
        finally:
            cur.close()
            conn.close()

        if not lastid_new == lastid:
            lastid = lastid_new
        elif icount > 18000000:
            libes2.writeLog('T_PROD finished lastid:' + str(lastid))


def get_inner_str(t):

    if t is None or str(t) == "" or len(str(t)) == 0:
        return "0"
    elif type(t) == list:
        return ",".join(t)
    else:
        return str(t)


def get_str(a):
    if a is None: return ''
    else: return str(a).replace('\r', '').replace('\n', '<br/>')
Exemplo n.º 10
0
def write_to_db(fullpath):
    print 'start handling path:' + fullpath

    reader = UnicodeReader(open(fullpath))

    #conn=MySQLdb.connect(host='localhost',user='******',passwd='000000',port=3306,db="zi")
    #conn.set_character_set('utf8')
    #cur = conn.cursor()
    try:

        i = 1
        tmpdic = {}
        idcol = 1
        rlist = []
        for line in reader:

            #line.decode('gb3212').encode('utf-8')

            if i == 1:
                '''
              with open('./alltales_head_01.txt','ab') as f:
                  f.write(','.join(line) +"\r\n")
              break
              '''
                COL_N = 0
                for th in line:
                    if globalDic.has_key(th):

                        DB_N = str(globalDic[th])

                        tmpdic[DB_N] = COL_N

                        if globalDic[th] == '1':
                            idcol = COL_N
                    COL_N += 1

                print 'idcol:' + str(idcol)

                i += 1
                continue

            tlist = []

            for DB_N in range(24):  #the number of the db want.

                str1 = None
                if tmpdic.has_key(str(DB_N)):
                    COL_N = tmpdic[str(DB_N)]
                    str1 = line[COL_N].replace('\n', '').replace('\r', '<br/>')

                if DB_N == 0:
                    tlist.append(
                        str(hashlib.md5(line[idcol]).hexdigest()) +
                        str(len(line[idcol])))
                elif DB_N == 22:
                    tlist.append(fullpath)  #remark
                elif DB_N == 16:
                    tlist.append(getnumbers(str1))
                elif DB_N == 20:
                    tlist.append(getdate_temp(str1))
                elif DB_N > 16 and DB_N < 20:
                    tlist.append(getlastnumbers(str1))
                else:
                    tlist.append(str1)

            #add region
            tlist.append(get_geo(tlist[5], tlist[12], tlist[8]))

            #print DB_N

            #print tlist

            try:
                '''
              strsql = 'insert into zi_common_org(org_id,org_name_cn,org_desc,uname_contact,uposition_contact,Extel,Tel,Fax,mobile,email,uname_holder,org_addr_cn,postcode,org_gov_bzscope,ZIIndustryDesc,org_gov_type,org_gov_bzmode,registered_capital,org_revenues,org_pers_count,date_foundation,region_temp_str,remark,website,geo_region_id) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)'

              cur.execute(strsql,tlist)

              conn.commit()
              '''
                rlist.append(tlist)
                if i % 10000 == 0:
                    write_to_csv(rlist)

            except Exception, e:
                print e
                libes2.writeLog('insert db error:' + str(line[2]) + ',ERROR:' +
                                str(e))
            finally:
                #print 'handled......rows:'+str(i)
                i += 1
Exemplo n.º 11
0
                           port=3306,
                           db="zi")
    strsql = 'select region_id,Areacode,postcode,MobileNumber from dm_mobile'
    conn.set_character_set('utf8')
    cur = conn.cursor()
    try:
        cur.execute(strsql)
        data = cur.fetchall()
        for a in data:
            if not globalMoblieDic.has_key(a[3].strip()):
                globalMoblieDic[a[3].strip()] = a[0].strip()
            if not globalExtelDic.has_key(a[1].strip()):
                globalExtelDic[a[1].strip()] = a[0].strip()
            if not globalPostDic.has_key(a[2].strip()):
                globalPostDic[a[2].strip()] = a[0].strip()

        del data
    except Exception, e:
        print e
        libes2.writeLog('t_db error,ERROR:' + str(e))
    finally:
        cur.close()
        conn.close()

    # input processes
    filepath = u"./alixls/"

    GetData(filepath)

    #print getdate_temp(u"2005 年")
Exemplo n.º 12
0
                lastid_new = data[-1][0]
                
                queue.put(data)
                print str(os.getpid()) + '(put):' + str(time.time())+".lastid:"+str(lastid)

        except Exception, e:
            print e
            libes2.writeLog('t_image error lastid:' + str(lastid)+',ERROR:'+str(e))
        finally:
            cur.close()
            conn.close()

        if not lastid_new == lastid:
            lastid = lastid_new
        elif lastid==16283491:
            libes2.writeLog('t_image finished lastid:' + str(lastid))


def get_inner_str(t):
    
    if t is None or str(t)=="" or len(str(t))==0:
        return "0"
    elif type(t)== list:
        return ",".join(t)
    else: 
        return str(t)


# output worker
def outputQ(queue,lock):