Пример #1
0
    def build_pool(self):
        """Make openers pool

        :return: returns a new :class:`pycurl.MultiCUrl` object.
        """
        self._openers_pool = pycurl.CurlMulti()
        self._openers_pool.handles = []

        # Get calculated connections count
        num_openers = self.connections_count

        for i in xrange(num_openers):
            self._openers_pool.handles.append(self.get_opener())

        logger.info("Created {0} openers".format(num_openers))
        return self._openers_pool
Пример #2
0
def multi_get(wf,
              urls,
              debug=0,
              num_conn=100,
              timeout=5,
              ua=None,
              ref=None,
              percentile=100,
              cf=None,
              follow=1,
              ref_dict=None):
    if ua is None:
        ua = 'multi_get'
    queue = []

    wf_keys = dict.fromkeys(wf.keys(), 1)

    for url in dict.fromkeys(urls).keys():
        url = url.strip()
        if len(url) > 250:
            wf[url] = '---'
            continue
        if not url or url[0] == "#" or url in wf_keys:
            continue
        filename = "[%03d]" % (len(queue) + 1)
        queue.append((url, filename))

    if not queue:
        return

    num_urls = len(queue)
    num_conn = min(num_conn, num_urls)
    assert 1 <= num_conn <= 10000, "invalid number of concurrent connections"
    if debug:
        print "PycURL %s (compiled against 0x%x)" % (
            pycurl.version, pycurl.COMPILE_LIBCURL_VERSION_NUM)

    if debug:
        print "----- Getting", num_urls, "URLs using", num_conn, "connections -----"

    m = pycurl.CurlMulti()
    m.handles = []
    for i in range(num_conn):
        c = pycurl.Curl()
        c.fp = None
        if follow:
            c.setopt(pycurl.FOLLOWLOCATION, 1)
            c.setopt(pycurl.MAXREDIRS, 5)
        c.setopt(pycurl.CONNECTTIMEOUT, timeout)
        c.setopt(pycurl.TIMEOUT, timeout)
        c.setopt(pycurl.NOSIGNAL, 1)
        c.setopt(pycurl.USERAGENT, ua)
        if cf:
            c.setopt(pycurl.COOKIEFILE, cf)
            c.setopt(pycurl.COOKIEJAR, cf)

        if ref: c.setopt(pycurl.REFERER, ref)
        m.handles.append(c)

    from UserString import MutableString

    freelist = m.handles[:]
    num_processed = 0
    bailout = 0
    while num_processed < num_urls:
        if bailout: break
        while queue and freelist:
            url, filename = queue.pop(0)
            if '.pdf' not in url:
                c = freelist.pop()
                if type(url) == type(u''):
                    url = url.encode('utf8', 'replace')
                c.setopt(pycurl.URL, url)
                c.res = cStringIO.StringIO()
                c.setopt(pycurl.WRITEFUNCTION, c.res.write)
                if ref_dict is not None:
                    if ref_dict.get(url, ''):
                        c.setopt(pycurl.REFERER, ref_dict.get(url, ''))

                m.add_handle(c)
                c.filename = filename
                c.url = url
            else:
                wf[url] = '---'
                num_urls -= 1
        while 1:
            ret, num_handles = m.perform()
            if ret != pycurl.E_CALL_MULTI_PERFORM:
                break
        while 1:
            num_q, ok_list, err_list = m.info_read()
            for c in ok_list:
                c.fp = None
                m.remove_handle(c)

                text = c.res.getvalue()
                if len(text) > 100000: text = ''

                wf[c.url] = text

                try:
                    if debug: print "[ ok] %5s %40s" % (c.filename, c.url[:40])
                except:
                    pass

                freelist.append(c)
            for c, errno, errmsg in err_list:
                c.fp = None
                m.remove_handle(c)
                if debug: print "[err] %5s %40s" % (c.filename, c.url[:40])
                wf[c.url] = '---'
                freelist.append(c)
            num_processed = num_processed + len(ok_list) + len(err_list)
            if num_urls:
                if float(num_processed) / num_urls * 100 > percentile:
                    bailout = 1
                    break
            if num_q == 0:
                break
        m.select(1.0)

    m.close()
Пример #3
0
class KKRequest(object):
    _curl_options = {
        "GET": pycurl.HTTPGET,
        "POST": pycurl.POST,
        "PUT": pycurl.PUT,
        "HEAD": pycurl.NOBODY,
    }

    SUPPORTED_METHODS = ("GET", "HEAD", "POST", "DELETE", "PUT", "OPTIONS")

    _multi_curl = pycurl.CurlMulti()
    _multi_curl_map = {}
    _multi_curl_request_info = []

    @classmethod
    def http_do_request(cls, method, url, headers = None, data = None, of = None, session=None):
        try:
            if session is None:
                session = requests.Session()
            response = session.request(method, url, headers=headers, timeout=120)
            content = (response.status_code, response.content)
            return content
        except Exception as e:
            error_msg = str(e).split(' ')
            content = (-1, error_msg)
            return content

    @classmethod
    def make_pycurl(cls, method, url, headers = None, data = None):
        header_output = BytesIO()
        body_output = BytesIO()

        c = pycurl.Curl()
        c.setopt(pycurl.URL, url)
        c.setopt(pycurl.NOSIGNAL, 1)

        if isinstance(headers, dict):
            c.setopt(pycurl.HTTPHEADER, ["%s: %s" % (capwords(f, "-"), v) for f, v
                    in CaseInsensitiveDict(headers).iteritems()])

        c.setopt(pycurl.CONNECTTIMEOUT, 3)
        c.setopt(pycurl.TIMEOUT, 3)

        if method in cls._curl_options.keys():
            c.setopt(cls._curl_options[method], True)
        elif method in cls.SUPPORTED_METHODS:
            c.setopt(pycurl.CUSTOMREQUEST, method)

        if method in ("POST", "PUT"):
            if data is None:
                data = ""

            body_inout = BytesIO(data)
            c.setopt(pycurl.READFUNCTION, body_inout.read)
            def ioctl(cmd):
                if cmd == pycurl.IOCMD_RESTARTREAD:
                    body_inout.seek(0)

            c.setopt(pycurl.IOCTLFUNCTION, ioctl)
            if method == "PUT":
                c.setopt(pycurl.PUT, True)
                c.setopt(pycurl.INFILESIZE, len(data))
            else:
                c.setopt(pycurl.POST, True)
                c.setopt(pycurl.POSTFIELDSIZE, len(data))

            c.setopt(pycurl.HEADERFUNCTION, header_output.write)
            c.setopt(pycurl.HEADERFUNCTION, body_output.write)

        return (c, header_output, body_output)

    @classmethod
    def http_do_request_by_curl(cls, method, url, headers = None, data = None):
        method = method.upper()
        if method not in cls.SUPPORTED_METHODS:
            raise InvalidMethod("cURL do not support %s method" % method.upper())

        try:
            c, header_output, body_output = cls.make_pycurl(method, url, headers, data)

            c.perform()
        except pycurl.error, e:
            error_message = "pycurl error: %s" % str(e)
            return (-1, error_message)
        except Exception, e:
            error_message = "Request Error: %s" % str(e)
            return (-1, error_message)