Exemplo n.º 1
0
    def start_requests(self):
        count = utils.get_table_length(self.sql, self.name)
        count_free = utils.get_table_length(self.sql, config.httpbin_table)

        ids = utils.get_table_ids(self.sql, self.name)
        ids_free = utils.get_table_ids(self.sql, config.httpbin_table)

        for i in range(0, count + count_free):
            table = self.name if (i < count) else config.httpbin_table
            id = ids[i] if i < count else ids_free[i - len(ids)]

            proxy = utils.get_proxy_info(self.sql, table, id)
            if proxy == None:
                continue

            for url in self.urls:
                cur_time = time.time()
                yield Request(
                        url = url,
                        headers = self.headers,
                        meta = {
                            'cur_time': cur_time,
                            'download_timeout': self.timeout,
                            'proxy_info': proxy,
                            'table': table,
                            'id': proxy.get('id'),
                            'proxy': 'http://%s:%s' % (proxy.get('ip'), proxy.get('port')),
                            'vali_count': proxy.get('vali_count', 0)
                        },
                        dont_filter = True,
                        callback = self.success_parse,
                        errback = self.error_parse,
                )
Exemplo n.º 2
0
    def get_unity_version(self, response):
        content = json.loads(response.body)
        utils.log('unity content:%s' % response.body)

        unity_version = content.get('kharma_version', '')

        headers = {
            'Accept': '*/*',
            'Accept-Encoding': 'gzip, deflate, br',
            'Accept-Language': 'zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3',
            'Connection': 'keep-alive',
            'Host': 'www.assetstore.unity3d.com',
            'Referer': 'https://www.assetstore.unity3d.com/en/',
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:50.0) Gecko/20100101 Firefox/50.0',
            'X-Kharma-Version': unity_version,
            'X-Requested-With': 'UnityAssetStore',
            'X-Unity-Session': '26c4202eb475d02864b40827dfff11a14657aa41',
        }

        count = utils.get_table_length(self.sql, self.name)
        count_free = utils.get_table_length(self.sql, config.httpbin_table)

        ids = utils.get_table_ids(self.sql, self.name)
        ids_free = utils.get_table_ids(self.sql, config.httpbin_table)

        for i in range(0, count + count_free):
            table = self.name if (i < count) else config.httpbin_table
            id = ids[i] if i < count else ids_free[i - len(ids)]

            proxy = utils.get_proxy_info(self.sql, table, id)
            if proxy == None:
                continue

            url = 'https://www.assetstore.unity3d.com/api/en-US/content/overview/' + '368' + '.json'
            cur_time = time.time()
            yield Request(
                    url = url,
                    headers = headers,
                    meta = {
                        'cur_time': cur_time,
                        'download_timeout': self.timeout,
                        'proxy_info': proxy,
                        'table': table,
                        'id': proxy.get('id'),
                        'proxy': 'http://%s:%s' % (proxy.get('ip'), proxy.get('port')),
                    },
                    dont_filter = True,
                    callback = self.success_parse,
                    errback = self.error_parse,
            )
Exemplo n.º 3
0
    def start_requests(self):
        r = requests.get(url=self.urls[0])
        data = json.loads(r.text)
        self.origin_ip = data.get('origin', '')
        utils.log('origin ip:%s' % self.origin_ip)

        count = utils.get_table_length(self.sql, self.name)
        count_free = utils.get_table_length(self.sql,
                                            config.free_ipproxy_table)

        for i in range(0, count + count_free):
            table = self.name if (i < count) else config.free_ipproxy_table

            proxy = utils.get_proxy_info(self.sql, table, i)
            if proxy == None:
                continue

            for url in self.urls:
                https = 'yes' if 'https' in url else 'no'

                yield Request(
                    url=url,
                    headers=self.headers,
                    dont_filter=True,
                    priority=0 if https == 'yes' else 10,
                    meta={
                        'cur_time':
                        time.time(),
                        'download_timeout':
                        self.timeout,
                        'proxy_info':
                        proxy,
                        'table':
                        table,
                        'id':
                        proxy.get('id'),
                        'https':
                        https,
                        'proxy':
                        'http://%s:%s' % (proxy.get('ip'), proxy.get('port')),
                    },
                    callback=self.success_parse,
                    errback=self.error_parse,
                )
Exemplo n.º 4
0
    def start_requests(self):
        count = utils.get_table_length(self.sql, self.name)
        count_free = utils.get_table_length(self.sql, config.httpbin_table)

        ids = utils.get_table_ids(self.sql, self.name)
        ids_free = utils.get_table_ids(self.sql, config.httpbin_table)

        for i in range(0, count + count_free):
            table = self.name if (i < count) else config.httpbin_table
            id = ids[i] if i < count else ids_free[i - len(ids)]

            proxy = utils.get_proxy_info(self.sql, table, id)
            if proxy == None:
                continue

            url = random.choice(self.urls)
            pattern = re.compile('\d+', re.S)
            product_id = re.search(pattern, url).group()

            cur_time = time.time()
            self.log('start_request cur_time:%s' % cur_time)
            yield Request(
                    url = url,
                    headers = self.headers,
                    meta = {
                        'cur_time': cur_time,
                        'download_timeout': self.timeout,
                        'proxy_info': proxy,
                        'table': table,
                        'id': proxy.get('id'),
                        'proxy': 'http://%s:%s' % (proxy.get('ip'), proxy.get('port')),
                        'vali_count': proxy.get('vali_count', 0),
                        'product_id': product_id,
                    },
                    dont_filter = True,
                    callback = self.get_comment_count,
                    errback = self.error_parse,
            )
Exemplo n.º 5
0
    def start_requests(self):
        count = utils.get_table_length(self.sql, self.name)
        count_free = utils.get_table_length(self.sql, config.httpbin_table)

        ids = utils.get_table_ids(self.sql, self.name)
        ids_free = utils.get_table_ids(self.sql, config.httpbin_table)

        for i in range(0, count + count_free):
            table = self.name if (i < count) else config.httpbin_table
            id = ids[i] if i < count else ids_free[i - len(ids)]

            proxy = utils.get_proxy_info(self.sql, table, id)
            if proxy == None:
                continue

            for url in self.urls:
                cur_time = time.time()
                yield FormRequest(
                    url=url,
                    headers=self.headers,
                    method='POST',
                    meta={
                        'cur_time':
                        cur_time,
                        'download_timeout':
                        self.timeout,
                        'proxy_info':
                        proxy,
                        'table':
                        table,
                        'id':
                        proxy.get('id'),
                        'proxy':
                        'http://%s:%s' % (proxy.get('ip'), proxy.get('port')),
                        'vali_count':
                        proxy.get('vali_count', 0),
                    },
                    cookies={
                        'Hm_lpvt_4233e74dff0ae5bd0a3d81c6ccf756e6':
                        '1488937030',
                        '_ga': 'GA1.2.40497390.1488937014',
                        'TG-TRACK-CODE': 'search_code',
                        'index_location_city': '%E5%8C%97%E4%BA%AC',
                        'LGRID':
                        '20170308093710-bf6755eb-039f-11e7-8025-525400f775ce',
                        'Hm_lvt_4233e74dff0ae5bd0a3d81c6ccf756e6':
                        '1488881288,1488936799,1488936947,1488937014',
                        'JSESSIONID': 'BDCBB6167F960CE43AF54B75A651F586',
                        'LGSID':
                        '20170308093653-b59316f0-039f-11e7-9229-5254005c3644',
                        'LGUID':
                        '20170308093653-b593185f-039f-11e7-9229-5254005c3644',
                        'user_trace_token':
                        '20170308093654-723efcfac8fb4c28a670d073d5113e02',
                        'SEARCH_ID': '4db4dc3dea1c46b49018ae5421b53ffa'
                    },
                    formdata={
                        'first': 'true',
                        'kd': 'ios',
                        'pn': '1',
                    },
                    dont_filter=True,
                    callback=self.success_parse,
                    errback=self.error_parse,
                )