def perform(cls): if cls._futures: while True: status, num_active = cls._multi.perform() if status != pycurl.E_CALL_MULTI_PERFORM: break while True: num_ready, success, fail = cls._multi.info_read() for c in success: cc = cls._futures.pop(c) result = curl_result(c) result['id'] = c._raw_id result['state'] = 'normal' cc.set_result(result) for c, err_num, err_msg in fail: print('error:', err_num, err_msg, c.getinfo(pycurl.EFFECTIVE_URL)) result = curl_result(c) result['url'] = c._raw_url result['id'] = c._raw_id result['state'] = 'error' result['error_code'] = err_num result['error_desc'] = err_msg cls._futures.pop(c).set_exception( CurlLoop.CurlException(code=err_num, desc=err_msg, data=result)) if num_ready == 0: break
def perform(cls): if cls._futures: while True: status, num_active = cls._multi.perform() if status != pycurl.E_CALL_MULTI_PERFORM: break while True: num_ready, success, fail = cls._multi.info_read() for c in success: cc = cls._futures.pop(c) result = curl_result(c) result['url'] = c._raw_url result['id'] = c._raw_id result['state'] = 'normal' result['spider'] = 'pycurl' result['payload'] = payload = c._raw_payload # post_func = payload.get('post_func') # if type(post_func) == str: # post_func = load(post_func) # if post_func: # result = post_func(payload, result) cc.set_result(result) for c, err_num, err_msg in fail: print('error:', err_num, err_msg, c.getinfo(pycurl.EFFECTIVE_URL)) result = curl_result(c) result['url'] = c._raw_url result['id'] = c._raw_id result['state'] = 'error' result['spider'] = 'pycurl' result['error_code'] = err_num result['error_desc'] = err_msg result['payload'] = payload = c._raw_payload # post_func = payload.get('post_func') # if type(post_func) == str: # post_func = load(post_func) # if post_func: # result2 = post_func(payload, result) # if type(result2) is dict and len(result2) >= len(result): # result = result2 cls._futures.pop(c).set_exception( CurlLoop.CurlException(code=err_num, desc=err_msg, data=result)) if num_ready == 0: break
def get_it(payload): if type(payload) is list: payload = payload[0] c = pycurl.Curl() data_buf = BytesIO() # header_buf = BytesIO() headers = {'count': 0, 'content': [{}]} try: setup_curl_for_get(c, payload, data_buf, headers) # header_buf) c.perform() resp = curl_result(c) resp['url'] = payload.get('url') resp['id'] = payload.get('id') resp['state'] = 'normal' resp['spider'] = 'pycurl' resp['payload'] = payload pycurl_get_resp(data_buf, headers, payload, resp) return resp except pycurl.error as e: resp = curl_result(c) resp['url'] = payload.get('url') resp['id'] = payload.get('id') resp['state'] = 'error' resp['spider'] = 'pycurl' resp['error_code'] = code = e.args[0] resp['error_desc'] = desc = e.args[1] if code in [18, 47]: resp['state'] = 'abnormal' pycurl_get_resp(data_buf, headers, payload, resp) return resp except Exception as e: resp = curl_result(c) resp['url'] = payload.get('url') resp['id'] = payload.get('id') resp['state'] = 'critical' resp['spider'] = 'pycurl' resp['error_code'] = '-1' resp['error_desc'] = 'pycurl re-one exception leaked: ' + str( e) + ' ' + str(type(e)) return resp finally: c.close()