def parse(self, response): img = response.css('div.img-container').xpath('.//img') img_url = img.xpath('@src').extract_first() if img_url is not None: item = ImageScraperItem( file_urls=[img_url], files=[], response=response) yield item
def _parse(response): file_url = response.xpath( '//div[@class="first-image"]/img/@src').extract_first() if file_url is not None and not self.__should_ignore(file_url): item = ImageScraperItem( tags=[category], file_urls=[urllib.parse.urljoin(response.url, file_url)], files=[]) yield item
def _parse(response): file_url = response.xpath( '//img[contains(@class, "item_main")]/@src').extract_first() if file_url is not None and not self.__should_ignore(file_url): item = ImageScraperItem( tags=[category], file_urls=[urllib.parse.urljoin(response.url, file_url)], files=[]) yield item
def parse(self, response): images = response.xpath( '//ul[@class="innerList"]//div[@class="imgWrap"]//img') for image in images: file_url = image.xpath('@src').extract_first() if file_url is not None and not self.__should_ignore(file_url): item = ImageScraperItem(tags=[url_tag_map[response.url]], file_urls=[file_url], files=[]) yield item
def _parse(response): json_content = response.xpath( '//div[starts-with(@id, "entitledItem_")]/text()' ).extract_first() file_url = None if json_content is not None: parsed = json.loads(json_content) file_url = parsed[0]["ItemImage"] if file_url is not None and not self.__should_ignore(file_url): item = ImageScraperItem( tags=[category], file_urls=[urllib.parse.urljoin(response.url, file_url)], files=[]) yield item
def parse(self, response): posts = response.xpath('//post') for post in posts: file_url = 'http:' + post.xpath('@file_url').extract_first() tags = post.xpath('@tags').extract_first().split(' ') tags = list(filter(lambda x: x != '', tags)) if file_url is not None and not self.__should_ignore(file_url): item = ImageScraperItem( tags=tags, file_urls=[file_url], files=[] ) yield item