def parse(self, response): items = [] css_path = ('#cjo_content > div ' '> div.searchArea > div.galleryType.domainTwo ' '> ul > li') for sel in response.css(css_path): item = CrawlCjmallItem() try: path = 'div.domainTxt > div.name > a::attr(href)' item['id'] = extract_re(sel, path, r'item_cd=(\d*)', '') path = 'div.domainTxt > div.name > a::text' item['title'] = extract(sel, path, '') path = ('div.domainTxt > div.price ' '> span > span.blue > strong::text') price = extract(sel, path, 0) path = ('div.domainTxt > div.price ' '> span > span > strong::text') origin_price = extract(sel, path, 0) item['price'] = origin_price if price: item['price'] = price path = 'a > img::attr(src)' item['image_url'] = extract(sel, path, '') path = 'div.domainTxt > div.name > a::attr(href)' re = r'viewDetailItem\(\'\s*(.*)' item['link'] = self.BASE_URL + extract_re(sel, path, re, '') items.append(item) except exceptions.IndexError: msg = 'Out of index, Keyword: {0}'.format(self.keyword) log.msg(msg, level=log.WARNING, spider=self) log.msg(str(item), level=log.WARNING, spider=self) except: msg = 'Unknown Error, Keyword: {0}'.format(self.keyword) log.msg(msg, level=log.WARNING, spider=self) return items
def parse(self, response): items = [] for sel in response.css('#productList > li'): item = CrawlCoupangItem() try: path = '*::attr(id)' item['id'] = extract(sel, path, '') path = 'a.detail-link > strong.title > em::text' item['title'] = extract(sel, path, '') path = ('a.detail-link > em.prod-price ' '> span.price-detail > strong.price ' '> em::text') item['price'] = extract(sel, path, 0) path = 'a.detail-link > span.condition > em::text' item['condition'] = extract(sel, path, 0) path = 'a.detail-link > img::attr(src)' item['image_url'] = extract(sel, path, '') path = 'a.detail-link::attr(href)' item['link'] = self.BASE_URL + extract(sel, path, '') items.append(item) except exceptions.IndexError: msg = 'Out of index, Keyword: {0}'.format(self.keyword) log.msg(msg, level=log.WARNING, spider=self) log.msg(str(item), level=log.WARNING, spider=self) except: msg = 'Unknown Error, Keyword: {0}'.format(self.keyword) log.msg(msg, level=log.WARNING, spider=self) return items