def parse(self, response): content = response.body content_json = json.loads(content) code = content_json.get('code') if str(code) == '0': item = QianniuItem() item['content'] = response.body item['meta'] = response.meta item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time())) yield item else: try: msg = content_json.get('msg') if 'login' not in msg: meta = response.meta url = response.url yield Request(url, callback=self.parse, headers=header, cookies=self.cookie, meta=meta, dont_filter=True) except Exception, e: print e print response.body
def parse(self, response): try: content = response.body # print content content_json = json.loads(content) code = content_json.get('code') # print code meta = response.meta if str(code) == '0': item = QianniuItem() item['content'] = response.body item['meta'] = meta item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time())) yield item else: # print content try: msg = content_json.get('msg') if 'login' in msg: r.hset('cookie_logou', meta.get('brand'), time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) except Exception, e: print e # print response.body pass except: print '*******' * 10 meta = response.meta url = response.url yield Request(url, callback=self.parse, headers=header, cookies=meta.get('cookie_brand'), meta=meta, dont_filter=True)
def parse(self, response): try: content = response.body print content content_json = json.loads(content) code = content_json.get('code') print code meta = response.meta if str(code) == '0': item = QianniuItem() item['content'] = response.body item['meta'] = meta item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time())) # item['dt'] = self.getSignday() item['data_dt'] = self.select_date yield item except: url = response.url meta = response.meta yield Request(url, callback=self.parse, headers=header, cookies=meta.get('cookie_brand'), meta=meta, dont_filter=True)
def parse(self, response): try: content = response.body # print content content_json = json.loads(content) code = content_json.get('code') # print code meta = response.meta if str(code) == '0': item = QianniuItem() item['content'] = response.body item['meta'] = meta item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time())) yield item if meta.get('cate') == '商品效果': url = response.url url_list = url.split('&page=') if url_list[1] == '1': item_json = json.loads(response.body) data = item_json.get('data') if data: recordCount = data.get('recordCount') # recordCount = 6033 if recordCount: page_num, page_mod = divmod(int(recordCount), 2000) # print page_num, page_mod if page_mod > 0: page_num = page_num + 1 # print page_num for i in xrange(2, page_num + 1): url_xg = url_list[0] + '&page=%s' % i # print url_xg yield Request(url_xg, callback=self.parse, headers=header, cookies=meta.get('cookie_brand'), meta={'cate': meta.get('cate'), 'month': meta.get('month'), 'brand': meta.get('brand')}, dont_filter=True) else: # print content try: msg = content_json.get('msg') if 'login' in msg: # hset如果哈希表不存在,一个新的哈希表被创建并进行 HSET 操作。如果字段已经存在于哈希表中,旧值将被覆盖。 r.hset('cookie_logou', meta.get('brand'), time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())) except Exception ,e: print e # print response.body pass except: print '*******' * 10 meta = response.meta url = response.url yield Request(url, callback=self.parse, headers=header, cookies=meta.get('cookie_brand'), meta=meta, dont_filter=True)
def parse_act_detail_hour_live(self, response): content = response.body content_json = json.loads(content) code = content_json.get('code') meta = response.meta meta['cate'] = 'parse_act_detail_hour_live' if str(code) == '0': data = content_json.get('data') if data: item = QianniuItem() item['content'] = data item['meta'] = meta item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time())) yield item
def parse_data(self, response): content = response.body content_json = json.loads(content) code = content_json.get('code') meta = response.meta meta['cate'] = 'parse_act_item_live' if str(code) == '0': data = content_json.get('data') if data: item = QianniuItem() item['content'] = data item['meta'] = meta item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time())) yield item count = data.get('recordCount') if count: num = int(round(float(count / 20))) for i in xrange(2, num + 2): url = 'https://sycm.taobao.com/datawar/v3/activity/itemCoreIndex/getItemListOffline.json?activityId=' + str( meta.get('activityId') ) + '&itemType=0&device=1&keyword=&dateType=day&dateRange=' + str( meta.get('activity_time')) + '%7C' + str( meta.get('activity_time') ) + '&pageSize=20&page=' + str( i) + '&order=desc&orderBy=' + meta.get( 'orderby') yield scrapy.Request(url, callback=self.parse_act_item_live, headers=header, cookies=meta.get('cookie_brand'), meta={ 'url': url, 'brand': meta.get('brand'), 'cookie_brand': meta.get('cookie_brand'), 'd': meta.get('d'), 'activityId': meta.get('activityId'), 'activity_time': meta.get('activity_time'), 'orderby': meta.get('orderby') }, dont_filter=True)
def parse_realtime_overview_payamt(self, response): content = response.body content_json = json.loads(content) code = content_json.get('code') meta = response.meta meta['cate'] = 'parse_realtime_overview_flow_payamt' # brand = meta.get('brand') if str(code) == '0': data = content_json.get('data') if data: item = QianniuItem() item['content'] = data item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time())) item['meta'] = meta yield item
def parse_item(self, response): meta = response.meta item_json = json.loads(response.body) data = item_json.get('data') if data: item = QianniuItem() item['content'] = data item['meta'] = meta item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time())) yield item url = response.url url_list = url.split('&page=') if url_list[1] == '1': item_json = json.loads(response.body) data = item_json.get('data') if data: recordCount = data.get('recordCount') if recordCount: page_num, page_mod = divmod(int(recordCount), 20) # print page_num, page_mod if page_mod > 0: page_num = page_num + 1 # print page_num for i in xrange(2, page_num + 1): url_xg = url_list[0] + '&page=%s' % i # print url_xg yield Request(url_xg, callback=self.parse_item, headers=header, cookies=meta.get('cookie_brand'), meta={ 'month': meta.get('month'), 'brand': meta.get('brand'), 'catename': meta.get('catename'), 'cateid': meta.get('cateid') }, dont_filter=True)
def parse_data(self, response): content = response.body content_json = json.loads(content) code = content_json.get('code') meta = response.meta meta['cate'] = 'parse_data' if str(code) == '0': data = content_json.get('data') if data: item = QianniuItem() item['content'] = data item['meta'] = meta item['dt'] = time.strftime('%Y-%m-%d', time.localtime(time.time())) yield item #recordCount = data.get('data') #if recordCount: # count = recordCount.get('recordCount') # if count: # num = int(round(float(count / 20))) for i in xrange(2, 4): url = 'https://sycm.taobao.com/datawar/v3/activity/itemCoreIndex/getItemListLive.json?activityId=%s&itemType=0&device=1&keyword=&pageSize=20&page=%s&order=desc&orderBy=%s' % ( str(meta.get('activityId')), str(i), meta.get('orderby')) yield Request(url, callback=self.parse_act_item, headers=header, cookies=meta.get('cookie_brand'), meta={ 'brand': meta.get('brand'), 'cookie_brand': meta.get('cookie_brand'), 'd': meta.get('d'), 'activityId': meta.get('activityId'), 'orderby': meta.get('orderby') }, dont_filter=True)