Exemplo n.º 1
0
    def parse(self, response):
        self.logger.info('Parsing Wangjia Rating Item URLs From <%s>.' % response.url)

        item = ExporterItem()
        elements = response.xpath('//table[@id="rateTable_body"]/tbody/tr')
        for ele in elements:
            item.set_record(self.url_prefix + get_content(ele.xpath('td/a[@class="pname"]/@href').extract()))

        return item
Exemplo n.º 2
0
    def parse(self, response):
        self.logger.info('Parsing Wangjia Rating Item URLs From <%s>.' %
                         response.url)

        item = ExporterItem()
        elements = response.xpath('//table[@id="rateTable_body"]/tbody/tr')
        for ele in elements:
            item.set_record(
                self.url_prefix +
                get_content(ele.xpath('td/a[@class="pname"]/@href').extract()))

        return item
Exemplo n.º 3
0
    def parse(self, response):
        self.logger.info('Parsing Wangjia News %s URLs From <%s>.' % (self.category, response.url))

        item = ExporterItem()
        elements = response.xpath('//ul[@class="zllist"]/li')
        for ele in elements:
            url = get_content(ele.xpath('div[2]/h3/a/@href').extract())
            if url.find(self.category) == -1: continue

            thread = get_thread_from_news_url(url)
            if int(self.max_thread) < int(thread):
                item.set_record(url)

        return item
Exemplo n.º 4
0
    def parse(self, response):
        self.logger.info('Parsing Wangjia News %s URLs From <%s>.' % (self.category, response.url))

        item = ExporterItem()
        elements = response.xpath('//div[contains(@class, "specialBox")]//div[@class="news_title"]')
        for ele in elements:
            url = get_content(ele.xpath('a/@href').extract())
            if url.find(self.category) == -1: continue

            thread = get_thread_from_news_url(url)
            if int(self.max_thread) < int(thread):
                item.set_record(url)

        return item
Exemplo n.º 5
0
    def parse(self, response):
        self.logger.info('Parsing Wangjia Exporsure URLs From <%s>.' %
                         response.url)

        item = ExporterItem()

        elements = response.xpath('//div[@class="detail-ul-list"]/div/ul/li')
        for ele in elements:
            url = get_content(ele.xpath('./div/a/@href').extract())
            thread = get_thread_from_exposure_url(url)

            if int(self.max_thread) < int(thread):
                item.set_record(url)
        return item
Exemplo n.º 6
0
    def parse(self, response):
        symbol = (self.plat_id, get_url_host(response.url), response.url)
        self.logger.info('Parsing No.%s [%s] Plat Page Count From <%s>.' % symbol)

        item = ExporterItem()
        try:
            content = json.loads(response.body_as_unicode())
            #content = {'result': '1', 'data': {'token': 'yamiedie'}}
            if int(content.get('result_code', 0)) != 1:
                raise ValueError
        except Exception as e:
            self.logger.warning('Fail To Receive No.%s [%s] Plat Page Count From <%s>.' % symbol)
            return None

        item.set_record(content.get('page_count', 0))
        return item
    def parse(self, response):
        symbol = (self.channel_id, response.url)
        self.logger.info('Parsing [%s] Channel Count From <%s>.' % symbol)

        item = ExporterItem()
        try:
            content = json.loads(response.body_as_unicode())
            internal_content = content.get('showapi_res_body', {})
            if int(content.get('showapi_res_code', -1)) != 0 or not internal_content or \
               int(internal_content.get('ret_code', -1)) != 0:
                raise ValueError
        except Exception:
            self.logger.warning('Fail To Receive No.%s [%s] Plat Page Count From <%s>.' % symbol)
            return None

        item.set_record(internal_content.get('pagebean', {}).get('allPages', 0))
        return item
Exemplo n.º 8
0
    def parse(self, response):
        self.logger.info('Parsing Wangjia Exporsure URLs From <%s>.' % response.url)

        item = ExporterItem()
        elements = response.xpath('//table[starts-with(@summary, "forum")]/tbody')
        #elements = response.xpath('//div[@class="comeing_channel_tab_area"]/table/tbody')
        for ele in elements:
            content = ele.xpath('tr/th[@class="new"]')
            #content = ele.xpath('tr/td[@class="comeing_channel_threadlist_sub"]')
            if not content: continue

            url = get_content(content.xpath('a[contains(@class, "xst")]/@href').extract())
            thread = get_thread_from_exposure_url(url)
            if int(self.max_thread) < int(thread):
                item.set_record(url)

        return item
Exemplo n.º 9
0
    def parse(self, response):
        symbol = (self.plat_id, get_url_host(response.url), response.url)
        self.logger.info('Parsing No.%s [%s] Plat Login Info From <%s>.' % symbol)

        try:
            content = json.loads(response.body_as_unicode())
            #content = {'result': '1', 'data': {'token': 'yamiedie'}}
            if int(content.get('result', 0)) != 1:
                raise ValueError
        except Exception:
            self.logger.warning('Fail To Receive No.%s [%s] Plat Login Info From <%s>.' % symbol)
            return None

        item = ExporterItem()
        item.set_record(content.get('data', {}).get('token'))
        item.set_record(json.dumps(response.headers.getlist('Set-Cookie')))
        return item
Exemplo n.º 10
0
    def parse(self, response):
        symbol = (self.plat_id, get_url_host(response.url), response.url)
        self.logger.info('Parsing No.%s [%s] Plat Page Count From <%s>.' %
                         symbol)

        item = ExporterItem()
        try:
            content = json.loads(response.body_as_unicode())
            #content = {'result': '1', 'data': {'token': 'yamiedie'}}
            if int(content.get('result_code', 0)) != 1:
                raise ValueError
        except Exception as e:
            self.logger.warning(
                'Fail To Receive No.%s [%s] Plat Page Count From <%s>.' %
                symbol)
            return None

        item.set_record(content.get('page_count', 0))
        return item
Exemplo n.º 11
0
    def parse(self, response):
        symbol = (self.channel_id, response.url)
        self.logger.info('Parsing [%s] Channel Count From <%s>.' % symbol)

        item = ExporterItem()
        try:
            content = json.loads(response.body_as_unicode())
            internal_content = content.get('showapi_res_body', {})
            if int(content.get('showapi_res_code', -1)) != 0 or not internal_content or \
               int(internal_content.get('ret_code', -1)) != 0:
                raise ValueError
        except Exception:
            self.logger.warning(
                'Fail To Receive No.%s [%s] Plat Page Count From <%s>.' %
                symbol)
            return None

        item.set_record(
            internal_content.get('pagebean', {}).get('allPages', 0))
        return item
Exemplo n.º 12
0
    def parse(self, response):
        symbol = (self.plat_id, get_url_host(response.url), response.url)
        self.logger.info('Parsing No.%s [%s] Plat Login Info From <%s>.' %
                         symbol)

        try:
            content = json.loads(response.body_as_unicode())
            self.logger.info(content)
            if int(content.get('result', 0)) != 1:
                raise ValueError
        except Exception:
            self.logger.warning(
                'Fail To Receive No.%s [%s] Plat Login Info From <%s>.' %
                symbol)
            return None

        item = ExporterItem()
        item.set_record(content.get('data', {}).get('token'))
        print content.get('data', {}).get('token')
        return item
Exemplo n.º 13
0
    def parse(self, response):
        self.logger.info('Parsing Wangjia Exporsure URLs From <%s>.' %
                         response.url)

        item = ExporterItem()
        elements = response.xpath(
            '//table[starts-with(@summary, "forum")]/tbody')
        #elements = response.xpath('//div[@class="comeing_channel_tab_area"]/table/tbody')
        for ele in elements:
            content = ele.xpath('tr/th[@class="new"]')
            #content = ele.xpath('tr/td[@class="comeing_channel_threadlist_sub"]')
            if not content: continue

            url = get_content(
                content.xpath('a[contains(@class, "xst")]/@href').extract())
            thread = get_thread_from_exposure_url(url)
            if int(self.max_thread) < int(thread):
                item.set_record(url)

        return item
Exemplo n.º 14
0
    def parse(self, response):
        symbol = (self.plat_id, get_url_host(response.url), response.url)
        self.logger.info('Parsing No.%s [%s] Plat Login Info From <%s>.' %
                         symbol)

        try:
            content = json.loads(response.body_as_unicode())
            #content = {'result': '1', 'data': {'token': 'yamiedie'}}
            if int(content.get('result', 0)) != 1:
                raise ValueError
        except Exception:
            self.logger.warning(
                'Fail To Receive No.%s [%s] Plat Login Info From <%s>.' %
                symbol)
            return None

        item = ExporterItem()
        item.set_record(content.get('data', {}).get('token'))
        item.set_record(json.dumps(response.headers.getlist('Set-Cookie')))
        return item