def Get_Imag_URL(self, response): aa = Spider_Django.django_sql() if aa.is_crawled(response.url): """获取图片的真实地址""" sel = Selector(response) img_url = sel.xpath('//p[@align="center"]/a/img/@src').extract() context = sel.xpath('//h1[@class="gb-final-tit-article"]/text()').extract() items = [] item = Cos8Item() for relative_img_url in img_url: img_real_url = relative_img_url item["img_base_url"] = [img_real_url] item["html_base_url"] = response.url print "=====================" print img_real_url print response.url for aa in context: item["img_content"] = [aa] items.append(item) # return items else: print response.url + " is crawled !" pass
def Get_Imag_URL(self, response): aa = Spider_Django.django_sql() if aa.is_crawled(response.url): """获取图片的真实地址""" sel = Selector(response) img_url = sel.xpath('//img[@id="bigimg"]/@src').extract() context = sel.xpath('//div[@class="p_box hauto tcenter"]/h1/text()').extract() items = [] item = Cos8Item() for relative_img_url in img_url: img_real_url = settings.base_url + relative_img_url item["img_base_url"] = [img_real_url] item["html_base_url"] = response.url for aa in context: item["img_content"] = [aa] items.append(item) return items else: print response.url + " is crawled !" pass