def build_pool(self): """Make openers pool :return: returns a new :class:`pycurl.MultiCUrl` object. """ self._openers_pool = pycurl.CurlMulti() self._openers_pool.handles = [] # Get calculated connections count num_openers = self.connections_count for i in xrange(num_openers): self._openers_pool.handles.append(self.get_opener()) logger.info("Created {0} openers".format(num_openers)) return self._openers_pool
def multi_get(wf, urls, debug=0, num_conn=100, timeout=5, ua=None, ref=None, percentile=100, cf=None, follow=1, ref_dict=None): if ua is None: ua = 'multi_get' queue = [] wf_keys = dict.fromkeys(wf.keys(), 1) for url in dict.fromkeys(urls).keys(): url = url.strip() if len(url) > 250: wf[url] = '---' continue if not url or url[0] == "#" or url in wf_keys: continue filename = "[%03d]" % (len(queue) + 1) queue.append((url, filename)) if not queue: return num_urls = len(queue) num_conn = min(num_conn, num_urls) assert 1 <= num_conn <= 10000, "invalid number of concurrent connections" if debug: print "PycURL %s (compiled against 0x%x)" % ( pycurl.version, pycurl.COMPILE_LIBCURL_VERSION_NUM) if debug: print "----- Getting", num_urls, "URLs using", num_conn, "connections -----" m = pycurl.CurlMulti() m.handles = [] for i in range(num_conn): c = pycurl.Curl() c.fp = None if follow: c.setopt(pycurl.FOLLOWLOCATION, 1) c.setopt(pycurl.MAXREDIRS, 5) c.setopt(pycurl.CONNECTTIMEOUT, timeout) c.setopt(pycurl.TIMEOUT, timeout) c.setopt(pycurl.NOSIGNAL, 1) c.setopt(pycurl.USERAGENT, ua) if cf: c.setopt(pycurl.COOKIEFILE, cf) c.setopt(pycurl.COOKIEJAR, cf) if ref: c.setopt(pycurl.REFERER, ref) m.handles.append(c) from UserString import MutableString freelist = m.handles[:] num_processed = 0 bailout = 0 while num_processed < num_urls: if bailout: break while queue and freelist: url, filename = queue.pop(0) if '.pdf' not in url: c = freelist.pop() if type(url) == type(u''): url = url.encode('utf8', 'replace') c.setopt(pycurl.URL, url) c.res = cStringIO.StringIO() c.setopt(pycurl.WRITEFUNCTION, c.res.write) if ref_dict is not None: if ref_dict.get(url, ''): c.setopt(pycurl.REFERER, ref_dict.get(url, '')) m.add_handle(c) c.filename = filename c.url = url else: wf[url] = '---' num_urls -= 1 while 1: ret, num_handles = m.perform() if ret != pycurl.E_CALL_MULTI_PERFORM: break while 1: num_q, ok_list, err_list = m.info_read() for c in ok_list: c.fp = None m.remove_handle(c) text = c.res.getvalue() if len(text) > 100000: text = '' wf[c.url] = text try: if debug: print "[ ok] %5s %40s" % (c.filename, c.url[:40]) except: pass freelist.append(c) for c, errno, errmsg in err_list: c.fp = None m.remove_handle(c) if debug: print "[err] %5s %40s" % (c.filename, c.url[:40]) wf[c.url] = '---' freelist.append(c) num_processed = num_processed + len(ok_list) + len(err_list) if num_urls: if float(num_processed) / num_urls * 100 > percentile: bailout = 1 break if num_q == 0: break m.select(1.0) m.close()
class KKRequest(object): _curl_options = { "GET": pycurl.HTTPGET, "POST": pycurl.POST, "PUT": pycurl.PUT, "HEAD": pycurl.NOBODY, } SUPPORTED_METHODS = ("GET", "HEAD", "POST", "DELETE", "PUT", "OPTIONS") _multi_curl = pycurl.CurlMulti() _multi_curl_map = {} _multi_curl_request_info = [] @classmethod def http_do_request(cls, method, url, headers = None, data = None, of = None, session=None): try: if session is None: session = requests.Session() response = session.request(method, url, headers=headers, timeout=120) content = (response.status_code, response.content) return content except Exception as e: error_msg = str(e).split(' ') content = (-1, error_msg) return content @classmethod def make_pycurl(cls, method, url, headers = None, data = None): header_output = BytesIO() body_output = BytesIO() c = pycurl.Curl() c.setopt(pycurl.URL, url) c.setopt(pycurl.NOSIGNAL, 1) if isinstance(headers, dict): c.setopt(pycurl.HTTPHEADER, ["%s: %s" % (capwords(f, "-"), v) for f, v in CaseInsensitiveDict(headers).iteritems()]) c.setopt(pycurl.CONNECTTIMEOUT, 3) c.setopt(pycurl.TIMEOUT, 3) if method in cls._curl_options.keys(): c.setopt(cls._curl_options[method], True) elif method in cls.SUPPORTED_METHODS: c.setopt(pycurl.CUSTOMREQUEST, method) if method in ("POST", "PUT"): if data is None: data = "" body_inout = BytesIO(data) c.setopt(pycurl.READFUNCTION, body_inout.read) def ioctl(cmd): if cmd == pycurl.IOCMD_RESTARTREAD: body_inout.seek(0) c.setopt(pycurl.IOCTLFUNCTION, ioctl) if method == "PUT": c.setopt(pycurl.PUT, True) c.setopt(pycurl.INFILESIZE, len(data)) else: c.setopt(pycurl.POST, True) c.setopt(pycurl.POSTFIELDSIZE, len(data)) c.setopt(pycurl.HEADERFUNCTION, header_output.write) c.setopt(pycurl.HEADERFUNCTION, body_output.write) return (c, header_output, body_output) @classmethod def http_do_request_by_curl(cls, method, url, headers = None, data = None): method = method.upper() if method not in cls.SUPPORTED_METHODS: raise InvalidMethod("cURL do not support %s method" % method.upper()) try: c, header_output, body_output = cls.make_pycurl(method, url, headers, data) c.perform() except pycurl.error, e: error_message = "pycurl error: %s" % str(e) return (-1, error_message) except Exception, e: error_message = "Request Error: %s" % str(e) return (-1, error_message)