Exemplo n.º 1
0
    def prepare_req(self, job, curl, proxies):
        pr = AioRunner.prepare_req(self, job, curl, proxies)
        if pr is not None:
            return pr

        url, headers = {}
        curl.prepare_req(url, headers=headers, proxies=proxies)
        return True
Exemplo n.º 2
0
    def prepare_req(self, job, curl, proxies):
        self.dbg('prepare')
        pa = AioRunner.prepare_req(self, job, curl, proxies)
        if pa is not None:
            return pa

        if 'value' in job:
            url = "https://www.linkedin.com/jobs2/view/%d" % job['value']
        else:
            url = job['url']
        print "[%d] prepare %s proxies=" % (self.idx, url), proxies
        headers={}
        if 'ip.cn' in url:
            headers['User-Agent'] = 'curl/7.20.1'
        curl.prepare_req(url, headers=headers, proxies=proxies)
        return True
Exemplo n.º 3
0
    def prepare_req(self, job, curl, proxies):
        self.dbg('prepare')
        pa = AioRunner.prepare_req(self, job, curl, proxies)
        if pa is not None:
            return pa

        if 'key' in job:
            key = spider.util.utf8str(job['key'])
            url = "http://qichacha.com/search?key=" + quote(key) + "&sType=0"
        else:
            Log.error("Invalid job.===>" + job.__str__())
        print "[%d] prepare %s proxies=" % (self.idx, url), proxies
        headers = {}
        if 'ip.cn' in url:
            headers['User-Agent'] = 'curl/7.20.1'
        curl.prepare_req(url, headers=headers, proxies=proxies)
        return True
Exemplo n.º 4
0
    def prepare_req(self, job, curl, proxies):
        self.dbg('prepare')
        pa = AioRunner.prepare_req(self, job, curl, proxies)
        if pa is not None:
            return pa

        if 'key' in job:
            key = spider.util.utf8str(job['key'])
            url = r"http://www.qixin007.com/search/?key=" + quote(
                key) + "&type=enterprise&source=&isGlobal=Y"
            # url = "http://qichacha.com/search?key=" + quote(key) + "&sType=0"
        else:
            Log.error("Invalid job.===>" + job.__str__())
        print "[%d] prepare %s proxies=" % (self.idx, url), proxies
        headers = {}
        if 'ip.cn' in url:
            headers['User-Agent'] = 'curl/7.20.1'
        if self.__jsl_clearance:
            headers["Cookie"] = "__jsl_clearance=" + self.__jsl_clearance + ";"
        if self.__jsluid:
            headers["Cookie"] += "__jsluid=" + self.__jsluid
        curl.prepare_req(url, headers=headers, proxies=proxies)
        return True