def doTask(task, param): proxyList = jTool.getProxy('proxy.txt') pcount = len(proxyList)-1 param['id'] = str(task[0]) param['url'] = param['preUrlip'] + task[1] param['ename'] = task[2] cursor = param['conn'].cursor() proxy = str(proxyList[random.randint(0, pcount)]).strip() # try: result = rp.crawlGetUrl(param['conn'], param, proxy) count = 1 while not result['logic'] and count<=2: proxy = str(proxyList[random.randint(0, pcount)]).strip() result = rp.crawlGetUrl(param['conn'], param, proxy) # result = rp.crawlPostUrl(param['conn'], param, proxy) count += 1 # if result['logic']:#insertId # print 'id:'+str(result['rtData']) # param['rid'] = result['rtData'] # result2 = rp.crawlPostUrl(param['conn'], param, proxy) # count2 = 1 # while not result2['logic'] and count2<=2: # result = rp.crawlPostUrl(param['conn'], param, proxy) # count2 += 1 # if result2['logic']: # return True # if not result['logic']: # result2['rtData']['eid'] = param['id'] # result2['rtData']['url'] = param['url'] # result2['rtData']['ename'] = param['ename'] # result2['rtData']['proxy'] = proxy.strip() # jTool.insertDatai(cursor, 'error_log', result2['rtData']) if result['logic']: completeTask(param, param['iid']) if not result['logic']: result['rtData']['eid'] = param['id'] result['rtData']['url'] = param['url'] result['rtData']['ename'] = param['ename'] result['rtData']['proxy'] = proxy.strip() jTool.insertDatai(cursor, 'error_log_1', result['rtData']) print 'error record:'+ str(param['id']) # except Exception, e: # print 'doTask', __name__, e # return True param['conn'].commit() cursor.close()
def operLog(logFileName): file = open(logFileName) line = file.readline() param = makeParam() param['taskTable'] = 'item_url_task' param['num'] = '999' proxyList = jTool.getProxy('proxy.txt') pcount = len(proxyList)-1 proxy = proxyList[random.randint(0, pcount)] cursor = param['conn'].cursor() cursor2 = param['conn'].cursor() cursor3 = param['conn'].cursor() count = 0 while line: tmp = line.split(',') if len(tmp)>2: for t in tmp: tt = t.split(':') if len(tt)>1: param[tt[0].strip()] = tt[1] if len(tt)>3: param[tt[0]] = tt[1]+':'+tt[2]+':'+tt[3].strip('\n') for i in range(10): ext = jTool.exsitsRecord(cursor, 'enterprise_raw_'+str(i), 'eid', param['id']) i += 1 if not ext: jTool.insertDatai(cursor, 'error_log', {'eid': param['id'], 'url': param['url'], 'ename': param['ename']}) print str(param['id']) else: try: if ext!='error': jTool.getField(cursor2, 'enterprise_raw_'+str(i), 'postContent', ' where eid = '+str(param['id'])) val = cursor2.fetchone() if not val[0]: jTool.insertDatai(cursor3, 'error_log', {'eid': param['id'], 'url': param['url'], 'ename': param['ename']}) print str(param['id']) except: pass param['conn'].commit() line = file.readline() count += 1 cursor.close() cursor2.close() cursor3.close() param['conn'].close() print 'line error :'+str(count)