Python smart_str示例，grab.tools.encoding.smart_str Python示例

示例#1

0

显示文件

文件： ping.py 项目： subeax/grab

def ping(name, url, grab, thread_number=10):
    """
    Do XMLRPC ping of given site.
    """

    name = smart_str(name)
    url = smart_str(url)

    def worker(rpc_url):
        post = PING_XML % {
            'url': html.escape(url),
            'name': html.escape(name),
        }
        ok = False
        try:
            grab.go(rpc_url, post=post)
        except Exception as ex:
            logging.error(unicode(ex))
        else:
            if not '<boolean>0' in grab.response.body:
                logging.error('%s : FAIL' % rpc_url)
                logging.error(grab.response.body[:1000])
            else:
                ok = True
        return rpc_url, ok

    results = []
    for rpc_url, ok in make_work(worker, SERVER_LIST, thread_number):
        results.append((rpc_url, ok))
    return results

示例#2

0

显示文件

文件： ping.py 项目： ArturFis/grab

def ping(name, url, grab, thread_number=10):
    """
    Do XMLRPC ping of given site.
    """
    
    name = smart_str(name)
    url = smart_str(url)

    def worker(rpc_url):
        post = PING_XML % {
            'url': html.escape(url),
            'name': html.escape(name),
        }
        ok = False
        try:
            grab.go(rpc_url, post=post)
        except Exception as ex:
            logging.error(unicode(ex))
        else:
            if not '<boolean>0' in grab.response.body:
                logging.error('%s : FAIL' % rpc_url)
                logging.error(grab.response.body[:1000])
            else:
                ok = True
        return rpc_url, ok

    results = []
    for rpc_url, ok in make_work(worker, SERVER_LIST, thread_number):
        results.append((rpc_url, ok))
    return results

示例#3

0

显示文件

文件： sug.py 项目： lorien/sug

def build_query_url(query, lang='ru'):
    args = {
        'hostname': GOOGLE_DOMAINS_ITER.next(),
        'query': urllib.quote_plus(smart_str(query)),
        'lang': lang,
        #'hl': lang,
    }
    return BASE_SUGGEST_URL % args

示例#4

0

显示文件

文件： sug.py 项目： lorien/sug

def build_query_url(query, lang='ru'):
    args = {
        'hostname': GOOGLE_DOMAINS_ITER.next(),
        'query': urllib.quote_plus(smart_str(query)),
        'lang': lang,
        #'hl': lang,
    }
    return BASE_SUGGEST_URL % args

示例#5

0

显示文件

文件： lxml_tools.py 项目： Scaurus/grab

def sanitize_html(html, encoding='utf-8', return_unicode=False):
    html = smart_str(html, encoding=encoding)
    if RE_TAG_START.search(html):
        html = render_html(parse_html(html))
    if return_unicode:
        return html.decode('utf-8')
    else:
        return html

示例#6

0

显示文件

文件： lxml_tools.py 项目： subeax/grab

def sanitize_html(html, encoding='utf-8', return_unicode=False):
    html = smart_str(html, encoding=encoding)
    if RE_TAG_START.search(html):
        html = render_html(parse_html(html))
    if return_unicode:
        return html.decode('utf-8')
    else:
        return html

示例#7

0

显示文件

文件： Glassdoor.py 项目： AlexanderNovikov/Dataminers

def main(lookFor, jobTitle, company, tag):
    employerHeaderPageId = 1
    questionTextPageId = 0
    g = Grab()
    g.go(p(lookFor, jobTitle, company, tag, employerHeaderPageId))
    employerHeader = g.xpath('//h1').text_content()
    f = open('Glassdoor.com ' + employerHeader + '.txt', 'w')
    f.write(smart_str(employerHeader) + ':\n')
    while True:
        g = Grab()
        questionTextPageId += 1
        g.go(p(lookFor, jobTitle, company, tag, questionTextPageId))
        if int(g.xpath('//li[@class="currPage"]').text) <= (questionTextPageId - 1):
            print 'Finished at page: ' + g.xpath('//li[@class="currPage"]').text + '!'
            break
        for questionText in g.xpath_list('//p[@class="questionText"]'):
            f.write(smart_str(questionText.text_content().strip()) + '\n')
        print 'Page # ' + g.xpath('//li[@class="currPage"]').text + ' parsed!'

示例#8

0

显示文件

def ping(name, url, grab, thread_number=10):
    """
    Do XMLRPC ping of given site.
    """

    name = smart_str(name)
    url = smart_str(url)

    def worker(rpc_url):
        post = PING_XML % {
            'url': html.escape(url),
            'name': html.escape(name),
        }
        ok = False
        try:
            grab.go(rpc_url, post=post)
        except Exception, ex:
            logging.error(unicode(ex))
        else:

示例#9

0

显示文件

文件： ping.py 项目： 31H0B1eV/grab

def ping(name, url, grab, thread_number=10):
    """
    Do XMLRPC ping of given site.
    """
    
    name = smart_str(name)
    url = smart_str(url)

    def worker(rpc_url):
        post = PING_XML % {
            'url': html.escape(url),
            'name': html.escape(name),
        }
        ok = False
        try:
            grab.go(rpc_url, post=post)
        except Exception, ex:
            logging.error(unicode(ex))
        else:

示例#10

0

显示文件

def main(lookFor, jobTitle, company, tag):
    employerHeaderPageId = 1
    questionTextPageId = 0
    g = Grab()
    g.go(p(lookFor, jobTitle, company, tag, employerHeaderPageId))
    employerHeader = g.xpath('//h1').text_content()
    f = open('Glassdoor.com ' + employerHeader + '.txt', 'w')
    f.write(smart_str(employerHeader) + ':\n')
    while True:
        g = Grab()
        questionTextPageId += 1
        g.go(p(lookFor, jobTitle, company, tag, questionTextPageId))
        if int(g.xpath('//li[@class="currPage"]').text) <= (
                questionTextPageId - 1):
            print 'Finished at page: ' + g.xpath(
                '//li[@class="currPage"]').text + '!'
            break
        for questionText in g.xpath_list('//p[@class="questionText"]'):
            f.write(smart_str(questionText.text_content().strip()) + '\n')
        print 'Page # ' + g.xpath('//li[@class="currPage"]').text + ' parsed!'

示例#11

0

显示文件

    def save_list(self, list_name, path):
        """
        Save items from list to the file.
        """

        with open(path, 'w') as out:
            lines = []
            for item in self.items.get(list_name, []):
                if isinstance(item, basestring):
                    lines.append(smart_str(item))
                else:
                    lines.append(json.dumps(item))
            out.write('\n'.join(lines))

示例#12

0

显示文件

文件： stat.py 项目： anujtyagi/grab

    def save_list(self, list_name, path):
        """
        Save items from list to the file.
        """

        with open(path, "w") as out:
            lines = []
            for item in self.items.get(list_name, []):
                if isinstance(item, basestring):
                    lines.append(smart_str(item))
                else:
                    lines.append(json.dumps(item))
            out.write("\n".join(lines))

示例#13

0

显示文件

文件： Habrahabr.py 项目： AlexanderNovikov/Dataminers

def main(tag):
    pageId = 0
    f = open(tag + '.txt', 'w')
    f.write(tag + ':\n' )
    while True:
        g = Grab()
        g.setup(timeout=60, connect_timeout=60)
        pageId += 1
        g.go(p(tag, pageId))
        v1 = g.xpath_text('//title')
        v2 = unicode("Хабрахабр — страница не найдена (404)", 'utf-8')
        if  v1 == v2:
            print 'Finished at page: ' + str(pageId) + '!'
            break
        for questionText in g.xpath_list('//a[@class="post_title"]'):
            f.write(smart_str(questionText.text_content().strip()) + '\n')
        print 'Page # ' + str(pageId) + ' parsed!'

示例#14

0

显示文件

文件： yandex.py 项目： Scaurus/grab

def build_search_url(query, page=1, per_page=None, lang='en', filter=True,
                     region=213, **kwargs):
    """
    Build yandex search url with specified query and pagination options.

    :param per_page: 10, 20, 30, 50, 100

    213 region is Moscow
    """

    query = smart_str(query)
    url = 'http://yandex.ru/yandsearch?text=%s&lr=%s' % (
        quote(query), region)
    if kwargs:
        url += '&' + urlencode(kwargs)
    url += '&p=%d' % (page - 1)
    return url

示例#15

0

显示文件

def main(tag):
    pageId = 0
    f = open(tag + '.txt', 'w')
    f.write(tag + ':\n')
    while True:
        g = Grab()
        g.setup(timeout=60, connect_timeout=60)
        pageId += 1
        g.go(p(tag, pageId))
        v1 = g.xpath_text('//title')
        v2 = unicode("Хабрахабр — страница не найдена (404)", 'utf-8')
        if v1 == v2:
            print 'Finished at page: ' + str(pageId) + '!'
            break
        for questionText in g.xpath_list('//a[@class="post_title"]'):
            f.write(smart_str(questionText.text_content().strip()) + '\n')
        print 'Page # ' + str(pageId) + ' parsed!'

示例#16

0

显示文件

def build_search_url(query,
                     page=None,
                     per_page=None,
                     lang=None,
                     filter=None,
                     **kwargs):
    """
    Build google search url with specified query and pagination options.

    :param per_page: 10, 20, 30, 50, 100
    kwargs:
        tbs=qdr:h
        tbs=qdr:d
        tbs=qdr:w
        tbs=qdr:m
        tbs=qdr:y
    """

    if per_page is None:
        per_page = 10
    if page is None:
        page = 1
    if lang is None:
        lang = 'en'
    if filter is None:
        filter = True
    start = per_page * (page - 1)

    if not 'hl' in kwargs:
        kwargs['hl'] = lang
    if not 'num' in kwargs:
        kwargs['num'] = per_page
    if not 'start' in kwargs:
        kwargs['start'] = start
    if not 'filter' in kwargs:
        if not filter:
            kwargs['filter'] = '0'

    url = 'http://google.com/search?q=%s' % quote(smart_str(query))
    if kwargs:
        url += '&' + urlencode(kwargs)
    return url

示例#17

0

显示文件

文件： yandex.py 项目： etuna/CS231n

def build_search_url(query,
                     page=1,
                     per_page=None,
                     lang='en',
                     filter=True,
                     region=213,
                     **kwargs):
    """
    Build yandex search url with specified query and pagination options.

    :param per_page: 10, 20, 30, 50, 100

    213 region is Moscow
    """

    query = smart_str(query)
    url = 'http://yandex.ru/yandsearch?text=%s&lr=%s' % (quote(query), region)
    if kwargs:
        url += '&' + urlencode(kwargs)
    url += '&p=%d' % (page - 1)
    return url

示例#18

0

显示文件

文件： google.py 项目： Scaurus/grab

def build_search_url(query, page=None, per_page=None, lang=None,
                     filter=None, **kwargs):
    """
    Build google search url with specified query and pagination options.

    :param per_page: 10, 20, 30, 50, 100
    kwargs:
        tbs=qdr:h
        tbs=qdr:d
        tbs=qdr:w
        tbs=qdr:m
        tbs=qdr:y
    """

    if per_page is None:
        per_page = 10
    if page is None:
        page = 1
    if lang is None:
        lang = 'en'
    if filter is None:
        filter = True
    start = per_page * (page - 1)

    if not 'hl' in kwargs:
        kwargs['hl'] = lang
    if not 'num' in kwargs:
        kwargs['num'] = per_page
    if not 'start' in kwargs:
        kwargs['start'] = start
    if not 'filter' in kwargs:
        if not filter:
            kwargs['filter'] = '0'


    url = 'http://google.com/search?q=%s' % quote(smart_str(query))
    if kwargs:
        url += '&' + urlencode(kwargs)
    return url

示例#19

0

显示文件

文件： google.py 项目： signaldetect/grab

def build_search_url(query, page=None, per_page=None, lang=None, filter=None, **kwargs):
    """
    Build google search url with specified query and pagination options.

    :param per_page: 10, 20, 30, 50, 100
    kwargs:
        tbs=qdr:h
        tbs=qdr:d
        tbs=qdr:w
        tbs=qdr:m
        tbs=qdr:y
    """

    if per_page is None:
        per_page = 10
    if page is None:
        page = 1
    if lang is None:
        lang = "en"
    if filter is None:
        filter = True
    start = per_page * (page - 1)

    if not "hl" in kwargs:
        kwargs["hl"] = lang
    if not "num" in kwargs:
        kwargs["num"] = per_page
    if not "start" in kwargs:
        kwargs["start"] = start
    if not "filter" in kwargs:
        if not filter:
            kwargs["filter"] = "0"

    url = "http://google.com/search?q=%s" % quote(smart_str(query))
    if kwargs:
        url += "&" + urlencode(kwargs)
    return url

示例#20

0

显示文件

文件： clck.py 项目： subeax/grab

def get_url(url):
    g = Grab()
    g.go('http://clck.ru/--?url=%s' % quote(smart_str(url)))
    return g.response.body

示例#21

0

显示文件

def build_query_url(query):
    args = {
        'query': urllib.quote_plus(smart_str(query)),
        #'hl': lang,
    }
    return BASE_SUGGEST_URL % args

示例#22

0

显示文件

文件： sug.py 项目： lorien/sug

def build_query_url(query):
    args = {
        'query': urllib.quote_plus(smart_str(query)),
        #'hl': lang,
    }
    return BASE_SUGGEST_URL % args

示例#23

0

显示文件

def quote(data):
    return urllib.quote_plus(smart_str(data))

示例#24

0

显示文件

文件： http.py 项目： Kuznitsin/grab

def quote(data):
    return urllib.quote_plus(smart_str(data))

示例#25

0

显示文件

文件： clck.py 项目： ArturFis/grab

def get_url(url):
    g = Grab()
    g.go('http://clck.ru/--?url=%s' % quote(smart_str(url)))
    return g.response.body

示例#26

0

显示文件

文件： curl.py 项目： Scaurus/grab

    def process_config(self, grab):
        """
        Setup curl instance with values from ``self.config``.
        """

        # Copy some config for future usage
        self.config_nobody = grab.config['nobody']
        self.config_body_maxsize = grab.config['body_maxsize']

        try:
            request_url = normalize_url(grab.config['url'])
        except Exception as ex:
            raise error.GrabInvalidUrl(u'%s: %s' % (unicode(ex), grab.config['url']))

        # py3 hack
        if not PY3K:
            request_url = smart_str(request_url)

        self.curl.setopt(pycurl.URL, request_url)

        self.curl.setopt(pycurl.FOLLOWLOCATION, 1 if grab.config['follow_location'] else 0)
        self.curl.setopt(pycurl.MAXREDIRS, grab.config['redirect_limit'])
        self.curl.setopt(pycurl.CONNECTTIMEOUT, grab.config['connect_timeout'])
        self.curl.setopt(pycurl.TIMEOUT, grab.config['timeout'])
        self.curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
        #self.curl.setopt(pycurl.DNS_CACHE_TIMEOUT, 0)
        if not grab.config['connection_reuse']:
            self.curl.setopt(pycurl.FRESH_CONNECT, 1)
            self.curl.setopt(pycurl.FORBID_REUSE, 1)

        self.curl.setopt(pycurl.NOSIGNAL, 1)
        self.curl.setopt(pycurl.HEADERFUNCTION, self.head_processor)

        if grab.config['body_inmemory']:
            self.curl.setopt(pycurl.WRITEFUNCTION, self.body_processor)
        else:
            if not grab.config['body_storage_dir']:
                raise error.GrabMisuseError('Option body_storage_dir is not defined')
            self.setup_body_file(grab.config['body_storage_dir'],
                                 grab.config['body_storage_filename'])
            self.curl.setopt(pycurl.WRITEFUNCTION, self.body_processor)

        if grab.config['verbose_logging']:
            self.verbose_logging = True

        # User-Agent
        if grab.config['user_agent'] is None:
            if grab.config['user_agent_file'] is not None:
                with open(grab.config['user_agent_file']) as inf:
                    lines = inf.read().splitlines()
                grab.config['user_agent'] = random.choice(lines)
            else:
                grab.config['user_agent'] = random_user_agent()

        # If value is None then set empty string
        # None is not acceptable because in such case
        # pycurl will set its default user agent "PycURL/x.xx.x"
        if not grab.config['user_agent']:
            grab.config['user_agent'] = ''

        self.curl.setopt(pycurl.USERAGENT, grab.config['user_agent'])

        if grab.config['debug']:
            self.curl.setopt(pycurl.VERBOSE, 1)
            self.curl.setopt(pycurl.DEBUGFUNCTION, self.debug_processor)

        # Ignore SSL errors
        self.curl.setopt(pycurl.SSL_VERIFYPEER, 0)
        self.curl.setopt(pycurl.SSL_VERIFYHOST, 0)

        # Disabled to avoid SSL3_READ_BYTES:sslv3 alert handshake failure error
        #self.curl.setopt(pycurl.SSLVERSION, pycurl.SSLVERSION_SSLv3)

        if grab.request_method == 'POST':
            self.curl.setopt(pycurl.POST, 1)
            if grab.config['multipart_post']:
                if isinstance(grab.config['multipart_post'], basestring):
                    raise error.GrabMisuseError('multipart_post option could not be a string')
                post_items = normalize_http_values(grab.config['multipart_post'],
                                                   charset=grab.config['charset'])
                # py3 hack
                if PY3K:
                    post_items = decode_pairs(post_items, grab.config['charset'])
                #import pdb; pdb.set_trace()
                self.curl.setopt(pycurl.HTTPPOST, post_items) 
            elif grab.config['post']:
                post_data = normalize_post_data(grab.config['post'], grab.config['charset'])
                # py3 hack
                #if PY3K:
                #    post_data = smart_unicode(post_data, grab.config['charset'])
                self.curl.setopt(pycurl.COPYPOSTFIELDS, post_data)
            else:
                self.curl.setopt(pycurl.POSTFIELDS, '')
        elif grab.request_method == 'PUT':
            data = grab.config['post']
            if isinstance(data, unicode) or (not PY3K and not isinstance(data, basestring)):
                # py3 hack
                #if PY3K:
                #    data = data.encode('utf-8')
                #else:
                raise error.GrabMisuseError('Value of post option could be only '\
                                            'byte string if PUT method is used')
            self.curl.setopt(pycurl.UPLOAD, 1)
            self.curl.setopt(pycurl.READFUNCTION, StringIO(data).read) 
            self.curl.setopt(pycurl.INFILESIZE, len(data))
        elif grab.request_method == 'PATCH':
            data = grab.config['post']
            if isinstance(data, unicode) or not isinstance(data, basestring):
                # py3 hack
                if PY3K:
                    data = data.encode('utf-8')
                else:
                    raise error.GrabMisuseError('Value of post option could be only byte '\
                                                'string if PATCH method is used')
            self.curl.setopt(pycurl.UPLOAD, 1)
            self.curl.setopt(pycurl.CUSTOMREQUEST, 'PATCH')
            self.curl.setopt(pycurl.READFUNCTION, StringIO(data).read) 
            self.curl.setopt(pycurl.INFILESIZE, len(data))
        elif grab.request_method == 'DELETE':
            self.curl.setopt(pycurl.CUSTOMREQUEST, 'delete')
        elif grab.request_method == 'HEAD':
            self.curl.setopt(pycurl.NOBODY, 1)
        elif grab.request_method == 'UPLOAD':
            self.curl.setopt(pycurl.UPLOAD, 1)
        elif grab.request_method == 'GET':
            self.curl.setopt(pycurl.HTTPGET, 1)
        else:
            raise error.GrabMisuseError('Invalid method: %s' % grab.request_method)
        
        headers = grab.config['common_headers']
        if grab.config['headers']:
            headers.update(grab.config['headers'])
        header_tuples = [str('%s: %s' % x) for x\
                         in headers.items()]
        self.curl.setopt(pycurl.HTTPHEADER, header_tuples)

        self.process_cookie_options(grab, request_url)

        if grab.config['referer']:
            self.curl.setopt(pycurl.REFERER, str(grab.config['referer']))

        if grab.config['proxy']:
            self.curl.setopt(pycurl.PROXY, str(grab.config['proxy'])) 
        else:
            self.curl.setopt(pycurl.PROXY, '')

        if grab.config['proxy_userpwd']:
            self.curl.setopt(pycurl.PROXYUSERPWD, str(grab.config['proxy_userpwd']))

        if grab.config['proxy_type']:
            ptype = getattr(pycurl, 'PROXYTYPE_%s' % grab.config['proxy_type'].upper())
            self.curl.setopt(pycurl.PROXYTYPE, ptype)

        if grab.config['encoding']:
            if 'gzip' in grab.config['encoding'] and not 'zlib' in pycurl.version:
                raise error.GrabMisuseError('You can not use gzip encoding because '\
                                      'pycurl was built without zlib support')
            self.curl.setopt(pycurl.ENCODING, grab.config['encoding'])

        if grab.config['userpwd']:
            self.curl.setopt(pycurl.USERPWD, str(grab.config['userpwd']))

        if grab.config.get('interface') is not None:
            self.curl.setopt(pycurl.INTERFACE, grab.config['interface'])

        if grab.config.get('reject_file_size') is not None:
            self.curl.setopt(pycurl.MAXFILESIZE, grab.config['reject_file_size'])

示例#27

0

显示文件

    def process_config(self, grab):
        """
        Setup curl instance with values from ``self.config``.
        """

        # Copy some config for future usage
        self.config_nobody = grab.config['nobody']
        self.config_body_maxsize = grab.config['body_maxsize']

        try:
            request_url = normalize_url(grab.config['url'])
        except Exception as ex:
            raise error.GrabInvalidUrl(u'%s: %s' %
                                       (unicode(ex), grab.config['url']))

        # py3 hack
        if not PY3K:
            request_url = smart_str(request_url)

        self.curl.setopt(pycurl.URL, request_url)

        self.curl.setopt(pycurl.FOLLOWLOCATION,
                         1 if grab.config['follow_location'] else 0)
        self.curl.setopt(pycurl.MAXREDIRS, grab.config['redirect_limit'])
        self.curl.setopt(pycurl.CONNECTTIMEOUT, grab.config['connect_timeout'])
        self.curl.setopt(pycurl.TIMEOUT, grab.config['timeout'])
        self.curl.setopt(pycurl.IPRESOLVE, pycurl.IPRESOLVE_V4)
        #self.curl.setopt(pycurl.DNS_CACHE_TIMEOUT, 0)
        if not grab.config['connection_reuse']:
            self.curl.setopt(pycurl.FRESH_CONNECT, 1)
            self.curl.setopt(pycurl.FORBID_REUSE, 1)

        self.curl.setopt(pycurl.NOSIGNAL, 1)
        self.curl.setopt(pycurl.HEADERFUNCTION, self.head_processor)

        if grab.config['body_inmemory']:
            self.curl.setopt(pycurl.WRITEFUNCTION, self.body_processor)
        else:
            if not grab.config['body_storage_dir']:
                raise error.GrabMisuseError(
                    'Option body_storage_dir is not defined')
            self.setup_body_file(grab.config['body_storage_dir'],
                                 grab.config['body_storage_filename'])
            self.curl.setopt(pycurl.WRITEFUNCTION, self.body_processor)

        if grab.config['verbose_logging']:
            self.verbose_logging = True

        # User-Agent
        if grab.config['user_agent'] is None:
            if grab.config['user_agent_file'] is not None:
                with open(grab.config['user_agent_file']) as inf:
                    lines = inf.read().splitlines()
                grab.config['user_agent'] = random.choice(lines)
            else:
                grab.config['user_agent'] = random_user_agent()

        # If value is None then set empty string
        # None is not acceptable because in such case
        # pycurl will set its default user agent "PycURL/x.xx.x"
        if not grab.config['user_agent']:
            grab.config['user_agent'] = ''

        self.curl.setopt(pycurl.USERAGENT, grab.config['user_agent'])

        if grab.config['debug']:
            self.curl.setopt(pycurl.VERBOSE, 1)
            self.curl.setopt(pycurl.DEBUGFUNCTION, self.debug_processor)

        # Ignore SSL errors
        self.curl.setopt(pycurl.SSL_VERIFYPEER, 0)
        self.curl.setopt(pycurl.SSL_VERIFYHOST, 0)

        # Disabled to avoid SSL3_READ_BYTES:sslv3 alert handshake failure error
        #self.curl.setopt(pycurl.SSLVERSION, pycurl.SSLVERSION_SSLv3)

        if grab.request_method == 'POST':
            self.curl.setopt(pycurl.POST, 1)
            if grab.config['multipart_post']:
                if isinstance(grab.config['multipart_post'], basestring):
                    raise error.GrabMisuseError(
                        'multipart_post option could not be a string')
                post_items = normalize_http_values(
                    grab.config['multipart_post'],
                    charset=grab.config['charset'])
                # py3 hack
                if PY3K:
                    post_items = decode_pairs(post_items,
                                              grab.config['charset'])
                #import pdb; pdb.set_trace()
                self.curl.setopt(pycurl.HTTPPOST, post_items)
            elif grab.config['post']:
                post_data = normalize_post_data(grab.config['post'],
                                                grab.config['charset'])
                # py3 hack
                #if PY3K:
                #    post_data = smart_unicode(post_data, grab.config['charset'])
                self.curl.setopt(pycurl.COPYPOSTFIELDS, post_data)
            else:
                self.curl.setopt(pycurl.POSTFIELDS, '')
        elif grab.request_method == 'PUT':
            data = grab.config['post']
            if isinstance(data,
                          unicode) or (not PY3K
                                       and not isinstance(data, basestring)):
                # py3 hack
                #if PY3K:
                #    data = data.encode('utf-8')
                #else:
                raise error.GrabMisuseError('Value of post option could be only '\
                                            'byte string if PUT method is used')
            self.curl.setopt(pycurl.UPLOAD, 1)
            self.curl.setopt(pycurl.READFUNCTION, StringIO(data).read)
            self.curl.setopt(pycurl.INFILESIZE, len(data))
        elif grab.request_method == 'PATCH':
            data = grab.config['post']
            if isinstance(data, unicode) or not isinstance(data, basestring):
                # py3 hack
                if PY3K:
                    data = data.encode('utf-8')
                else:
                    raise error.GrabMisuseError('Value of post option could be only byte '\
                                                'string if PATCH method is used')
            self.curl.setopt(pycurl.UPLOAD, 1)
            self.curl.setopt(pycurl.CUSTOMREQUEST, 'PATCH')
            self.curl.setopt(pycurl.READFUNCTION, StringIO(data).read)
            self.curl.setopt(pycurl.INFILESIZE, len(data))
        elif grab.request_method == 'DELETE':
            self.curl.setopt(pycurl.CUSTOMREQUEST, 'delete')
        elif grab.request_method == 'HEAD':
            self.curl.setopt(pycurl.NOBODY, 1)
        elif grab.request_method == 'UPLOAD':
            self.curl.setopt(pycurl.UPLOAD, 1)
        elif grab.request_method == 'GET':
            self.curl.setopt(pycurl.HTTPGET, 1)
        else:
            raise error.GrabMisuseError('Invalid method: %s' %
                                        grab.request_method)

        headers = grab.config['common_headers']
        if grab.config['headers']:
            headers.update(grab.config['headers'])
        header_tuples = [str('%s: %s' % x) for x\
                         in headers.items()]
        self.curl.setopt(pycurl.HTTPHEADER, header_tuples)

        self.process_cookie_options(grab, request_url)

        if grab.config['referer']:
            self.curl.setopt(pycurl.REFERER, str(grab.config['referer']))

        if grab.config['proxy']:
            self.curl.setopt(pycurl.PROXY, str(grab.config['proxy']))
        else:
            self.curl.setopt(pycurl.PROXY, '')

        if grab.config['proxy_userpwd']:
            self.curl.setopt(pycurl.PROXYUSERPWD,
                             str(grab.config['proxy_userpwd']))

        if grab.config['proxy_type']:
            ptype = getattr(pycurl,
                            'PROXYTYPE_%s' % grab.config['proxy_type'].upper())
            self.curl.setopt(pycurl.PROXYTYPE, ptype)

        if grab.config['encoding']:
            if 'gzip' in grab.config[
                    'encoding'] and not 'zlib' in pycurl.version:
                raise error.GrabMisuseError('You can not use gzip encoding because '\
                                      'pycurl was built without zlib support')
            self.curl.setopt(pycurl.ENCODING, grab.config['encoding'])

        if grab.config['userpwd']:
            self.curl.setopt(pycurl.USERPWD, str(grab.config['userpwd']))

        if grab.config.get('interface') is not None:
            self.curl.setopt(pycurl.INTERFACE, grab.config['interface'])

        if grab.config.get('reject_file_size') is not None:
            self.curl.setopt(pycurl.MAXFILESIZE,
                             grab.config['reject_file_size'])