示例#1
0
文件: 17173.py 项目: leearic/irole
    def Get_Imag_URL(self, response):

        aa = Spider_Django.django_sql()
        if aa.is_crawled(response.url):
            """获取图片的真实地址"""

            sel = Selector(response)
            img_url = sel.xpath('//p[@align="center"]/a/img/@src').extract()
            context = sel.xpath('//h1[@class="gb-final-tit-article"]/text()').extract()
            items = []
            item = Cos8Item()
            for relative_img_url in img_url:
                img_real_url = relative_img_url
                item["img_base_url"] = [img_real_url]
                item["html_base_url"] = response.url

                print "====================="
                print img_real_url
                print response.url

            for aa in context:
                item["img_content"] = [aa]
                items.append(item)
            # return  items
        else:
            print response.url + " is crawled !"
            pass
示例#2
0
文件: cos8.py 项目: leearic/irole
    def Get_Imag_URL(self, response):

        aa = Spider_Django.django_sql()
        if aa.is_crawled(response.url):
            """获取图片的真实地址"""

            sel = Selector(response)
            img_url = sel.xpath('//img[@id="bigimg"]/@src').extract()
            context = sel.xpath('//div[@class="p_box hauto tcenter"]/h1/text()').extract()
            items = []
            item = Cos8Item()
            for relative_img_url in img_url:
                img_real_url = settings.base_url + relative_img_url
                item["img_base_url"] = [img_real_url]
                item["html_base_url"] = response.url
            for aa in context:
                item["img_content"] = [aa]
                items.append(item)
            return  items
        else:
            print response.url + " is crawled !"
            pass