def parse(self, response): self.logger.info('Parsing Wangjia Rating Item URLs From <%s>.' % response.url) item = ExporterItem() elements = response.xpath('//table[@id="rateTable_body"]/tbody/tr') for ele in elements: item.set_record(self.url_prefix + get_content(ele.xpath('td/a[@class="pname"]/@href').extract())) return item
def parse(self, response): self.logger.info('Parsing Wangjia Rating Item URLs From <%s>.' % response.url) item = ExporterItem() elements = response.xpath('//table[@id="rateTable_body"]/tbody/tr') for ele in elements: item.set_record( self.url_prefix + get_content(ele.xpath('td/a[@class="pname"]/@href').extract())) return item
def parse(self, response): self.logger.info('Parsing Wangjia News %s URLs From <%s>.' % (self.category, response.url)) item = ExporterItem() elements = response.xpath('//ul[@class="zllist"]/li') for ele in elements: url = get_content(ele.xpath('div[2]/h3/a/@href').extract()) if url.find(self.category) == -1: continue thread = get_thread_from_news_url(url) if int(self.max_thread) < int(thread): item.set_record(url) return item
def parse(self, response): self.logger.info('Parsing Wangjia News %s URLs From <%s>.' % (self.category, response.url)) item = ExporterItem() elements = response.xpath('//div[contains(@class, "specialBox")]//div[@class="news_title"]') for ele in elements: url = get_content(ele.xpath('a/@href').extract()) if url.find(self.category) == -1: continue thread = get_thread_from_news_url(url) if int(self.max_thread) < int(thread): item.set_record(url) return item
def parse(self, response): self.logger.info('Parsing Wangjia Exporsure URLs From <%s>.' % response.url) item = ExporterItem() elements = response.xpath('//div[@class="detail-ul-list"]/div/ul/li') for ele in elements: url = get_content(ele.xpath('./div/a/@href').extract()) thread = get_thread_from_exposure_url(url) if int(self.max_thread) < int(thread): item.set_record(url) return item
def parse(self, response): symbol = (self.plat_id, get_url_host(response.url), response.url) self.logger.info('Parsing No.%s [%s] Plat Page Count From <%s>.' % symbol) item = ExporterItem() try: content = json.loads(response.body_as_unicode()) #content = {'result': '1', 'data': {'token': 'yamiedie'}} if int(content.get('result_code', 0)) != 1: raise ValueError except Exception as e: self.logger.warning('Fail To Receive No.%s [%s] Plat Page Count From <%s>.' % symbol) return None item.set_record(content.get('page_count', 0)) return item
def parse(self, response): symbol = (self.channel_id, response.url) self.logger.info('Parsing [%s] Channel Count From <%s>.' % symbol) item = ExporterItem() try: content = json.loads(response.body_as_unicode()) internal_content = content.get('showapi_res_body', {}) if int(content.get('showapi_res_code', -1)) != 0 or not internal_content or \ int(internal_content.get('ret_code', -1)) != 0: raise ValueError except Exception: self.logger.warning('Fail To Receive No.%s [%s] Plat Page Count From <%s>.' % symbol) return None item.set_record(internal_content.get('pagebean', {}).get('allPages', 0)) return item
def parse(self, response): self.logger.info('Parsing Wangjia Exporsure URLs From <%s>.' % response.url) item = ExporterItem() elements = response.xpath('//table[starts-with(@summary, "forum")]/tbody') #elements = response.xpath('//div[@class="comeing_channel_tab_area"]/table/tbody') for ele in elements: content = ele.xpath('tr/th[@class="new"]') #content = ele.xpath('tr/td[@class="comeing_channel_threadlist_sub"]') if not content: continue url = get_content(content.xpath('a[contains(@class, "xst")]/@href').extract()) thread = get_thread_from_exposure_url(url) if int(self.max_thread) < int(thread): item.set_record(url) return item
def parse(self, response): symbol = (self.plat_id, get_url_host(response.url), response.url) self.logger.info('Parsing No.%s [%s] Plat Login Info From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) #content = {'result': '1', 'data': {'token': 'yamiedie'}} if int(content.get('result', 0)) != 1: raise ValueError except Exception: self.logger.warning('Fail To Receive No.%s [%s] Plat Login Info From <%s>.' % symbol) return None item = ExporterItem() item.set_record(content.get('data', {}).get('token')) item.set_record(json.dumps(response.headers.getlist('Set-Cookie'))) return item
def parse(self, response): symbol = (self.plat_id, get_url_host(response.url), response.url) self.logger.info('Parsing No.%s [%s] Plat Page Count From <%s>.' % symbol) item = ExporterItem() try: content = json.loads(response.body_as_unicode()) #content = {'result': '1', 'data': {'token': 'yamiedie'}} if int(content.get('result_code', 0)) != 1: raise ValueError except Exception as e: self.logger.warning( 'Fail To Receive No.%s [%s] Plat Page Count From <%s>.' % symbol) return None item.set_record(content.get('page_count', 0)) return item
def parse(self, response): symbol = (self.channel_id, response.url) self.logger.info('Parsing [%s] Channel Count From <%s>.' % symbol) item = ExporterItem() try: content = json.loads(response.body_as_unicode()) internal_content = content.get('showapi_res_body', {}) if int(content.get('showapi_res_code', -1)) != 0 or not internal_content or \ int(internal_content.get('ret_code', -1)) != 0: raise ValueError except Exception: self.logger.warning( 'Fail To Receive No.%s [%s] Plat Page Count From <%s>.' % symbol) return None item.set_record( internal_content.get('pagebean', {}).get('allPages', 0)) return item
def parse(self, response): symbol = (self.plat_id, get_url_host(response.url), response.url) self.logger.info('Parsing No.%s [%s] Plat Login Info From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) self.logger.info(content) if int(content.get('result', 0)) != 1: raise ValueError except Exception: self.logger.warning( 'Fail To Receive No.%s [%s] Plat Login Info From <%s>.' % symbol) return None item = ExporterItem() item.set_record(content.get('data', {}).get('token')) print content.get('data', {}).get('token') return item
def parse(self, response): self.logger.info('Parsing Wangjia Exporsure URLs From <%s>.' % response.url) item = ExporterItem() elements = response.xpath( '//table[starts-with(@summary, "forum")]/tbody') #elements = response.xpath('//div[@class="comeing_channel_tab_area"]/table/tbody') for ele in elements: content = ele.xpath('tr/th[@class="new"]') #content = ele.xpath('tr/td[@class="comeing_channel_threadlist_sub"]') if not content: continue url = get_content( content.xpath('a[contains(@class, "xst")]/@href').extract()) thread = get_thread_from_exposure_url(url) if int(self.max_thread) < int(thread): item.set_record(url) return item
def parse(self, response): symbol = (self.plat_id, get_url_host(response.url), response.url) self.logger.info('Parsing No.%s [%s] Plat Login Info From <%s>.' % symbol) try: content = json.loads(response.body_as_unicode()) #content = {'result': '1', 'data': {'token': 'yamiedie'}} if int(content.get('result', 0)) != 1: raise ValueError except Exception: self.logger.warning( 'Fail To Receive No.%s [%s] Plat Login Info From <%s>.' % symbol) return None item = ExporterItem() item.set_record(content.get('data', {}).get('token')) item.set_record(json.dumps(response.headers.getlist('Set-Cookie'))) return item