示例#1
0
 def parse(self, response):
     content = response.body
     content_json = json.loads(content)
     code = content_json.get('code')
     if str(code) == '0':
         item = QianniuItem()
         item['content'] = response.body
         item['meta'] = response.meta
         item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time()))
         yield item
     else:
         try:
             msg = content_json.get('msg')
             if 'login' not in msg:
                 meta = response.meta
                 url = response.url
                 yield Request(url,
                               callback=self.parse,
                               headers=header,
                               cookies=self.cookie,
                               meta=meta,
                               dont_filter=True)
         except Exception, e:
             print e
             print response.body
示例#2
0
    def parse(self, response):
        try:
            content = response.body
            # print content
            content_json = json.loads(content)
            code = content_json.get('code')
            # print code
            meta = response.meta
            if str(code) == '0':
                item = QianniuItem()
                item['content'] = response.body
                item['meta'] = meta
                item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time()))
                yield item
            else:
                # print content
                try:
                    msg = content_json.get('msg')
                    if 'login' in msg:
                        r.hset('cookie_logou', meta.get('brand'), time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))

                except Exception, e:
                    print e
                    # print response.body
                    pass
        except:
            print '*******' * 10
            meta = response.meta
            url = response.url
            yield Request(url, callback=self.parse, headers=header, cookies=meta.get('cookie_brand'),
                          meta=meta, dont_filter=True)
示例#3
0
 def parse(self, response):
     try:
         content = response.body
         print content
         content_json = json.loads(content)
         code = content_json.get('code')
         print code
         meta = response.meta
         if str(code) == '0':
             item = QianniuItem()
             item['content'] = response.body
             item['meta'] = meta
             item['dt'] = time.strftime('%Y-%m-%d',
                                        time.localtime(time.time()))
             # item['dt'] = self.getSignday()
             item['data_dt'] = self.select_date
             yield item
     except:
         url = response.url
         meta = response.meta
         yield Request(url,
                       callback=self.parse,
                       headers=header,
                       cookies=meta.get('cookie_brand'),
                       meta=meta,
                       dont_filter=True)
示例#4
0
    def parse(self, response):
        try:
            content = response.body
            # print content
            content_json = json.loads(content)
            code = content_json.get('code')
            # print code
            meta = response.meta
            if str(code) == '0':
                item = QianniuItem()
                item['content'] = response.body
                item['meta'] = meta
                item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time()))
                yield item

                if meta.get('cate') == '商品效果':
                    url = response.url
                    url_list = url.split('&page=')
                    if url_list[1] == '1':
                        item_json = json.loads(response.body)
                        data = item_json.get('data')
                        if data:
                            recordCount = data.get('recordCount')
                            # recordCount = 6033
                            if recordCount:
                                page_num, page_mod = divmod(int(recordCount), 2000)
                                # print page_num, page_mod
                                if page_mod > 0:
                                    page_num = page_num + 1
                                # print page_num

                                for i in xrange(2, page_num + 1):
                                    url_xg = url_list[0] + '&page=%s' % i
                                    # print url_xg
                                    yield Request(url_xg, callback=self.parse, headers=header, cookies=meta.get('cookie_brand'),
                                                  meta={'cate': meta.get('cate'), 'month': meta.get('month'),
                                                        'brand': meta.get('brand')}, dont_filter=True)


            else:
                # print content
                try:
                    msg = content_json.get('msg')
                    if 'login' in msg:
                        # hset如果哈希表不存在,一个新的哈希表被创建并进行 HSET 操作。如果字段已经存在于哈希表中,旧值将被覆盖。
                        r.hset('cookie_logou', meta.get('brand'), time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))

                except Exception ,e:
                    print e
                    # print response.body
                    pass
        except:
            print '*******' * 10
            meta = response.meta
            url = response.url
            yield Request(url, callback=self.parse, headers=header, cookies=meta.get('cookie_brand'),
                          meta=meta, dont_filter=True)
示例#5
0
 def parse_act_detail_hour_live(self, response):
     content = response.body
     content_json = json.loads(content)
     code = content_json.get('code')
     meta = response.meta
     meta['cate'] = 'parse_act_detail_hour_live'
     if str(code) == '0':
         data = content_json.get('data')
         if data:
             item = QianniuItem()
             item['content'] = data
             item['meta'] = meta
             item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time()))
             yield item
示例#6
0
 def parse_data(self, response):
     content = response.body
     content_json = json.loads(content)
     code = content_json.get('code')
     meta = response.meta
     meta['cate'] = 'parse_act_item_live'
     if str(code) == '0':
         data = content_json.get('data')
         if data:
             item = QianniuItem()
             item['content'] = data
             item['meta'] = meta
             item['dt'] = time.strftime('%Y-%m-%d',
                                        time.localtime(time.time()))
             yield item
             count = data.get('recordCount')
             if count:
                 num = int(round(float(count / 20)))
                 for i in xrange(2, num + 2):
                     url = 'https://sycm.taobao.com/datawar/v3/activity/itemCoreIndex/getItemListOffline.json?activityId=' + str(
                         meta.get('activityId')
                     ) + '&itemType=0&device=1&keyword=&dateType=day&dateRange=' + str(
                         meta.get('activity_time')) + '%7C' + str(
                             meta.get('activity_time')
                         ) + '&pageSize=20&page=' + str(
                             i) + '&order=desc&orderBy=' + meta.get(
                                 'orderby')
                     yield scrapy.Request(url,
                                          callback=self.parse_act_item_live,
                                          headers=header,
                                          cookies=meta.get('cookie_brand'),
                                          meta={
                                              'url':
                                              url,
                                              'brand':
                                              meta.get('brand'),
                                              'cookie_brand':
                                              meta.get('cookie_brand'),
                                              'd':
                                              meta.get('d'),
                                              'activityId':
                                              meta.get('activityId'),
                                              'activity_time':
                                              meta.get('activity_time'),
                                              'orderby':
                                              meta.get('orderby')
                                          },
                                          dont_filter=True)
示例#7
0
 def parse_realtime_overview_payamt(self, response):
     content = response.body
     content_json = json.loads(content)
     code = content_json.get('code')
     meta = response.meta
     meta['cate'] = 'parse_realtime_overview_flow_payamt'
     # brand = meta.get('brand')
     if str(code) == '0':
         data = content_json.get('data')
         if data:
             item = QianniuItem()
             item['content'] = data
             item['dt'] = time.strftime('%Y-%m-%d',
                                        time.localtime(time.time()))
             item['meta'] = meta
             yield item
    def parse_item(self, response):
        meta = response.meta
        item_json = json.loads(response.body)
        data = item_json.get('data')
        if data:
            item = QianniuItem()
            item['content'] = data
            item['meta'] = meta
            item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time()))
            yield item
            url = response.url
            url_list = url.split('&page=')
            if url_list[1] == '1':
                item_json = json.loads(response.body)
                data = item_json.get('data')
                if data:
                    recordCount = data.get('recordCount')
                    if recordCount:
                        page_num, page_mod = divmod(int(recordCount), 20)
                        # print page_num, page_mod
                        if page_mod > 0:
                            page_num = page_num + 1
                        # print page_num

                        for i in xrange(2, page_num + 1):
                            url_xg = url_list[0] + '&page=%s' % i
                            # print url_xg
                            yield Request(url_xg,
                                          callback=self.parse_item,
                                          headers=header,
                                          cookies=meta.get('cookie_brand'),
                                          meta={
                                              'month': meta.get('month'),
                                              'brand': meta.get('brand'),
                                              'catename': meta.get('catename'),
                                              'cateid': meta.get('cateid')
                                          },
                                          dont_filter=True)
示例#9
0
    def parse_data(self, response):

        content = response.body
        content_json = json.loads(content)
        code = content_json.get('code')
        meta = response.meta
        meta['cate'] = 'parse_data'
        if str(code) == '0':
            data = content_json.get('data')
            if data:
                item = QianniuItem()
                item['content'] = data
                item['meta'] = meta
                item['dt'] = time.strftime('%Y-%m-%d',
                                           time.localtime(time.time()))
                yield item
                #recordCount = data.get('data')
                #if recordCount:
                #    count = recordCount.get('recordCount')
                #    if count:
                #        num = int(round(float(count / 20)))
                for i in xrange(2, 4):
                    url = 'https://sycm.taobao.com/datawar/v3/activity/itemCoreIndex/getItemListLive.json?activityId=%s&itemType=0&device=1&keyword=&pageSize=20&page=%s&order=desc&orderBy=%s' % (
                        str(meta.get('activityId')), str(i),
                        meta.get('orderby'))
                    yield Request(url,
                                  callback=self.parse_act_item,
                                  headers=header,
                                  cookies=meta.get('cookie_brand'),
                                  meta={
                                      'brand': meta.get('brand'),
                                      'cookie_brand': meta.get('cookie_brand'),
                                      'd': meta.get('d'),
                                      'activityId': meta.get('activityId'),
                                      'orderby': meta.get('orderby')
                                  },
                                  dont_filter=True)