Exemplo n.º 1
0
    def parse(self, response):
        sel = Selector(response)
        urls = sel.xpath("//ul[@class='vT-srch-result-list-bid']/li").extract()
        for i in range(len(urls)):
            item = zhaotou_zhaobItem()
            item['crl_30101001'] = "".join(
                sel.xpath(
                    "(//ul[@class='vT-srch-result-list-bid']/li)[%s]/a/@href" %
                    (i + 1)).extract())
            item['crl_10416003'] = \
                "".join(
                    sel.xpath("(//ul[@class='vT-srch-result-list-bid']/li)[%s]/span/text()" % (i + 1)).extract()).split(
                    "|")[0]
            item['crl_10416018'] = \
                "".join(
                    sel.xpath("(//ul[@class='vT-srch-result-list-bid']/li)[%s]/span/text()" % (i + 1)).extract()).split(
                    "|")[1]
            item['crl_10416022'] = \
                "".join(
                    sel.xpath("(//ul[@class='vT-srch-result-list-bid']/li)[%s]/span/text()" % (i + 1)).extract()).split(
                    "|")[2]
            item['crl_10416002'] = "".join(
                sel.xpath(
                    "(//ul[@class='vT-srch-result-list-bid']/li)[%s]/span/a/text()"
                    % (i + 1)).extract())
            item['crl_10416008'] = "".join(
                sel.xpath(
                    "(//ul[@class='vT-srch-result-list-bid']/li)[%s]/span/strong[last()]//text()"
                    % (i + 1)).extract())

            yield Request(item['crl_30101001'],
                          callback=self.parse_item,
                          meta={'item': item})
Exemplo n.º 2
0
    def parse_item(self, response):
        l = ItemLoader(zhaotou_zhaobItem(), response)

        l.add_value("crl_30101001", response.url)
        l.add_xpath("crl_30101002", "//div[@class='entry']//text()", MapCompose(unicode.strip))
        l.add_value("crl_30101003", u"陕西采购与招标网陕西省招投标协会")
        l.add_value("crl_30101004", u"公开招标")

        l.add_xpath("crl_10416001", "//h1/text()", MapCompose(unicode.strip))
        l.add_xpath("crl_10416003", "//span[@class='time']/text()", MapCompose(unicode.strip))

        return l.load_item()
Exemplo n.º 3
0
    def parse_item(self, response):
        l = ItemLoader(zhaotou_zhaobItem(), response)
        # URL
        l.add_value("crl_30101001", response.url)
        # 网页内容
        l.add_xpath("crl_30101002", "//div[@class='wz']//text()",
                    MapCompose(unicode.strip))
        l.add_value("crl_30101003", u"电梯采购网")
        l.add_value("crl_30101004", u"公开招标")
        # 项目标题
        l.add_xpath("crl_10416001", "///h2//text()", MapCompose(unicode.strip))
        # 发布时间
        l.add_xpath("crl_10416003", "//div[@class='zz']//text()",
                    MapCompose(unicode.strip))

        return l.load_item()
Exemplo n.º 4
0
    def parse_item(self, response):
        l = ItemLoader(zhaotou_zhaobItem(), response)
        # URL
        l.add_value("crl_30101001", response.url)
        # 网页内容
        l.add_xpath("crl_30101002", "//div[@id='vsb_content']//text()",
                    MapCompose(unicode.strip))
        l.add_value("crl_30101003", u"西安市财政局")
        l.add_value("crl_30101004", u"公开招标")
        # 项目标题
        l.add_xpath("crl_10416001", "//td[@class='titlestyle3181']//text()",
                    MapCompose(unicode.strip))
        # 发布时间
        l.add_xpath("crl_10416003", "//span[@class='timestyle3181']//text()",
                    MapCompose(unicode.strip))

        return l.load_item()
Exemplo n.º 5
0
    def parse_item(self, response):
        l = ItemLoader(zhaotou_zhaobItem(), response)

        l.add_value("crl_30101001", response.url)
        l.add_xpath(
            "crl_30101002",
            "//div[@id='vsb_content_2']//text() | //div[@id='vsb_content']//text()",
            MapCompose(unicode.strip))
        l.add_value("crl_30101003", u"西安高新技术产业开发区")
        l.add_value("crl_30101004", u"公开招标")

        l.add_xpath("crl_10416001",
                    "//tbody/tr[1]/td/table/tbody/tr[1]/td//text()",
                    MapCompose(unicode.strip))
        l.add_xpath("crl_10416003", "(//td[@nowrap='nowrap'][11])[1]//text()",
                    MapCompose(unicode.strip))

        return l.load_item()