Пример #1
0
 def parse_page_content(self,response):
     item = CqspiderItem()
     page_content = response.xpath('//tr[4]/td[2]//div[1]').extract()
     page_content = page_content.pop()
     page_content = re.sub('<[^>]+>',' ',page_content)
     fil = FilterForHC(page_content)
     item['project_name'] = fil.get_project_name()
     item['bid_name'] = fil.get_bid_name()
     item['bid_time'] = fil.get_bid_time()
     item['bid_money'] = fil.get_bid_money()
     return item
Пример #2
0
    def parse_page_content(self, response):
        item = CqspiderItem()
        content = response.xpath('//*[@id="textflag"]').extract().pop()
        content = re.sub('<[^>]*>', '', content)
        fil = FilterForJJ(content)

        item['project_name'] = fil.get_project_name()
        item['bid_name'] = fil.get_bid_name()
        item['bid_time'] = fil.get_bid_time()
        item['bid_money'] = fil.get_bid_money()
        return item
Пример #3
0
 def parse_page_content(self, response):
     item = CqspiderItem()
     page_content = response.xpath(
         '//*[@id="bulletinContent"]/tbody').extract().pop()
     page_content = re.sub('<[^>]+>', ' ', page_content)
     fil = FilterForCQS(page_content)
     item['project_name'] = fil.get_project_name()
     item['bid_name'] = fil.get_bid_name()
     item['bid_time'] = fil.get_bid_time()
     item['bid_money'] = fil.get_bid_money()
     return item
Пример #4
0
 def parse_page_content(self, response):
     item = CqspiderItem()
     page_content = response.xpath('//*[@id="tblInfo"]').extract()
     if page_content:
         page_content = page_content.pop()
         page_content = re.sub('<[^>]+>', ' ', page_content)
         fil = FilterForBB(page_content)
         item['project_name'] = fil.get_project_name()
         item['bid_name'] = fil.get_bid_name()
         item['bid_time'] = fil.get_bid_time()
         item['bid_money'] = fil.get_bid_money()
         return item