Пример #1
0
def doTask(task, param):
    proxyList = jTool.getProxy('proxy.txt')
    pcount = len(proxyList)-1
    param['id'] = str(task[0])
    param['url'] = param['preUrlip'] + task[1]
    param['ename'] = task[2]
    cursor = param['conn'].cursor()
    proxy = str(proxyList[random.randint(0, pcount)]).strip()
#    try:
    result = rp.crawlGetUrl(param['conn'], param, proxy)
    count = 1
    while not result['logic'] and count<=2:
            proxy = str(proxyList[random.randint(0, pcount)]).strip()
            result = rp.crawlGetUrl(param['conn'], param, proxy)
#            result = rp.crawlPostUrl(param['conn'], param, proxy)
            count += 1
#        if result['logic']:#insertId
#            print 'id:'+str(result['rtData'])
#            param['rid'] = result['rtData']
#            result2 = rp.crawlPostUrl(param['conn'], param, proxy)
#            count2 = 1
#            while not result2['logic'] and count2<=2:
#                result = rp.crawlPostUrl(param['conn'], param, proxy)
#                count2 += 1
#            if result2['logic']:
#                return True                          
#            if not result['logic']:
#                result2['rtData']['eid'] = param['id']
#                result2['rtData']['url'] = param['url']
#                result2['rtData']['ename'] = param['ename']
#                result2['rtData']['proxy'] = proxy.strip()
#                jTool.insertDatai(cursor, 'error_log', result2['rtData'])
    if result['logic']:
        completeTask(param, param['iid'])
    if not result['logic']:
        result['rtData']['eid'] = param['id']
        result['rtData']['url'] = param['url']
        result['rtData']['ename'] = param['ename']
        result['rtData']['proxy'] = proxy.strip()
        jTool.insertDatai(cursor, 'error_log_1', result['rtData'])
        print 'error record:'+ str(param['id'])
#    except Exception, e:
#        print 'doTask', __name__, e
#        return True
    param['conn'].commit()
    cursor.close()
Пример #2
0
def operLog(logFileName):
    file = open(logFileName)
    line = file.readline()
    param = makeParam()
    param['taskTable'] = 'item_url_task'
    param['num'] = '999'
    proxyList = jTool.getProxy('proxy.txt')
    pcount = len(proxyList)-1
    proxy = proxyList[random.randint(0, pcount)]
    cursor = param['conn'].cursor()
    cursor2 = param['conn'].cursor()
    cursor3 = param['conn'].cursor()
    count = 0
    while line:
        tmp = line.split(',')
        if len(tmp)>2:
            for t in tmp:
                tt = t.split(':')
                if len(tt)>1:
                    param[tt[0].strip()] = tt[1]
                    if len(tt)>3:
                        param[tt[0]] = tt[1]+':'+tt[2]+':'+tt[3].strip('\n')
            for i in range(10):
                ext = jTool.exsitsRecord(cursor, 'enterprise_raw_'+str(i), 'eid', param['id'])
                i += 1
                if not ext:
                    jTool.insertDatai(cursor, 'error_log', {'eid': param['id'], 'url': param['url'], 'ename': param['ename']})
                    print  str(param['id'])
                else:
                    try:
                        if ext!='error':
                            jTool.getField(cursor2, 'enterprise_raw_'+str(i), 'postContent', ' where eid = '+str(param['id']))
                            val = cursor2.fetchone()
                            if not val[0]:
                                jTool.insertDatai(cursor3, 'error_log', {'eid': param['id'], 'url': param['url'], 'ename': param['ename']})
                                print  str(param['id'])
                    except:
                        pass
            param['conn'].commit()
        line = file.readline()
        count += 1
    cursor.close()
    cursor2.close()
    cursor3.close()
    param['conn'].close()
    print 'line error :'+str(count)