示例#1
0
 def parse(self, response):
     description = cleanString(
         response.css("div#full-description-text").extract())
     if not description:
         description = cleanString(
             response.css("div.banner-club-big-text-box").extract())
     greenbox = cleanString(
         response.css("h1.productTitle").extract()) + cleanString(
             response.css("div.productSubTitle").extract())
     big_redbox = cleanString(
         response.css("td.product-list-checkboxes").extract())
     if similar(greenbox, big_redbox) > 0.9:
         title = greenbox
     else:
         low_price = re.search(r"'PriceDiscount':\s'\d{1,}'",
                               str(response.body)).group(0) if re.search(
                                   r"'PriceDiscount':\s'\d{1,}'",
                                   str(response.body)) else ''
         title = greenbox + self.integrator + big_redbox + low_price.replace(
             "'PriceDiscount':", '')
     yield CouponsItem(
         Title=title,
         supplier='16',
         brand=filterBrands(
             cleanString(response.css("h1.productTitle").extract()),
             self.brands),
         JoinUrl=response.url,
         Description=description,
         ScrapeDate=datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),
         DoorToDoorShipping=any(ext in (description + title)
                                for ext in allowed_shipping_list),
         cyclerun=self.cycleid)
示例#2
0
 def parse_item(self, response):
     isValid = response.css(
         '#ctl00_ContentPlaceHolder2_LabelCategoryDescription'
     ).extract_first() is not None
     if isValid:
         title = response.css(
             '#ctl00_ContentPlaceHolder2_LabelCategoryName::text').get()
         description = response.css(
             '#ctl00_ContentPlaceHolder2_LabelCategoryDescription::text'
         ).get() + cleanString(
             response.css(
                 '#ctl00_ContentPlaceHolder2_LabelCategoryText').extract())
         yield CouponsItem(
             Title=title,
             supplier='998',
             brand=filterBrands(
                 cleanString(
                     response.css(
                         '#ctl00_ContentPlaceHolder2_LabelCategoryName::text'
                     ).get()), self.brands),
             JoinUrl=response.url,
             Description=description,
             ScrapeDate=datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),
             DoorToDoorShipping=any(ext in (description + title)
                                    for ext in allowed_shipping_list),
             cyclerun=self.cycleid)
示例#3
0
文件: yours.py 项目: alleneben/scrapy
 def parse(self, response):
     m = re.search(r'https://www\.yours\.co\.il/benefits/(.+)/',
                   response.url)
     if m:
         benefit_id = m.group(1)
         formdata = {
             'club_id': f'{self.siteUuid}',
             'benefits_id': f'{benefit_id}'
         }
         r = requests.post(self.getBenefitDetails, json=formdata)
         if r.status_code == 200:
             data = r.json().get('benefits')[0]
             description = cleanString(data['benefits_description'])
             title = cleanString(data['benefits_name'])
             yield CouponsItem(
                 Title=title,
                 supplier='993',
                 brand=filterBrands(description, self.brands),
                 JoinUrl=self.linkBase + data['benefits_id'],
                 Description=description,
                 ScrapeDate=datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),
                 DoorToDoorShipping=any(ext in (description + title)
                                        for ext in allowed_shipping_list),
                 cyclerun=self.cycleid)
         else:
             self.logger.error("something went wrong:::" + r.status_code)
示例#4
0
 def parse_coupon(self, response):
     isValid = response.css('i.cafe_logo').extract_first() is not None
     if isValid:
         description = cleanString(response.css('span.small_text').extract())
         title=cleanString(response.css('div.richtext_div').extract_first())
         yield CouponsItem(Title=title,
                         supplier='990',
                         brand=filterBrands(cleanString(response.css('div.d_section > h1::text').get())[:-3],self.brands),
                         JoinUrl=self.linkBase.format(response.meta['d_number']),
                         Description=description,
                         ScrapeDate = datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),
                         DoorToDoorShipping= any(ext in (description+title) for ext in allowed_shipping_list),
                         cyclerun=self.cycleid )
示例#5
0
 def parse(self, response):
     isValid = response.css('.benefit-details-txt').extract_first() is not None
     if isValid:
         description=cleanString(response.css("div.benefit-details-txt").extract())
         title=cleanString(response.css("div.benefit-info h1::text").extract_first())
         yield CouponsItem(Title=title,
                         supplier='996', 
                         brand=filterBrands(cleanString(response.css("div.benefit-info h1::text").extract_first()),self.brands),
                         JoinUrl=response.url,
                         Description=description,
                         ScrapeDate = datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),
                         DoorToDoorShipping= any(ext in (description+title) for ext in allowed_shipping_list), 
                         cyclerun=self.cycleid )
示例#6
0
 def parse(self, response):
     description = cleanString(
         response.css('ul.product-info li:nth-child(6)').extract())
     title = cleanString(response.css("#ptitle::text").get())
     yield CouponsItem(
         Title=title,
         supplier='994',
         brand=filterBrands(
             cleanString(response.css("#ptitle::text").get()), self.brands),
         JoinUrl=response.url,
         Description=description,
         ScrapeDate=datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),
         DoorToDoorShipping=any(ext in (description + title)
                                for ext in allowed_shipping_list),
         cyclerun=self.cycleid)
示例#7
0
 def parse_item(self, response):
     description = cleanString(response.css("div.desc").extract())
     title = cleanString(
         response.css("div.info-box h1.title").extract_first())
     yield CouponsItem(
         Title=title,
         supplier='1',
         brand=filterBrands(
             urllib.parse.unquote(response.meta["brandname"]), self.brands),
         JoinUrl=response.url,
         Description=description,
         ScrapeDate=datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),
         DoorToDoorShipping=any(ext in (description + title)
                                for ext in allowed_shipping_list),
         cyclerun=self.cycleid)
示例#8
0
 def parse(self, response):
     result = json.loads(response.body)
     n = result.get('meta').get('totalitems')
     if n > 0:
         data = result.get('data')
         for r in data:
             for sale in r.get('attributes').get('sales'):
                 description = cleanString(sale['description'])
                 brandName = filterBrands(cleanString(sale['brand']['title']),self.brands)
                 title=cleanString(sale['title'])
                 title=f'{brandName} {title}'
                 yield CouponsItem(Title=title,
                                 supplier='2',
                                 brand= brandName,
                                 JoinUrl= f"https://myofer.co.il{response.meta['mallName']}brands/{sale['brand']['seoname']}/{sale['id']}",
                                 Description=description,
                                 ScrapeDate = datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),
                                 DoorToDoorShipping= any(ext in (description+title) for ext in allowed_shipping_list),
                                 cyclerun=self.cycleid )
示例#9
0
 def parse_item(self, response):
     isValid = response.css(
         'div.text_more_info_white.benefits').extract_first() is not None
     validCoupon = cleanString(
         response.css("h2.page-title::text").extract_first())
     if isValid and validCoupon and (validCoupon != ""):
         description = cleanString(response.css("div#main").extract())
         title = cleanString(
             response.css("h2.page-title::text").extract_first())
         yield CouponsItem(
             Title=title,
             supplier='997',
             brand=filterBrands(
                 cleanString(
                     response.css("h2.page-title::text").extract_first()),
                 self.brands),
             JoinUrl=response.url,
             Description=description,
             ScrapeDate=datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),
             DoorToDoorShipping=any(ext in (description + title)
                                    for ext in allowed_shipping_list),
             cyclerun=self.cycleid)
示例#10
0
 def parse(self, response):
     lst = response.css("nav.content ul.center li a").extract()
     for a in lst:
         soup = BeautifulSoup(a, 'lxml')
         input_tag = soup.find("a")
         href = input_tag.get('href')
         title = input_tag.get('title')
         img_tag = soup.find("img")
         desc = img_tag.get('src')
         m = re.search(
             r'https:\/\/www\.dreamcard\.co\.il\/special-offers\/', href)
         if m:
             yield CouponsItem(
                 Title=cleanString(title),
                 supplier='101',
                 brand=filterBrands(title, self.brands),
                 JoinUrl=href + "&&" + str(convertToNumber(title)),
                 Description=desc,
                 ScrapeDate=datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),
                 DoorToDoorShipping=any(ext in (title)
                                        for ext in allowed_shipping_list),
                 cyclerun=self.cycleid)
示例#11
0
    def parse(self, response):
        if self.starter:
            yield scrapy.Request(self.home,
                                 meta={'selenium': False},
                                 callback=self.anonymous_scrape)
        self.starter = False

        isValid = response.css(
            '.benefitInfo_content').extract_first() is not None
        if isValid:
            if 'online.max.co.il' in response.url:
                description = cleanString(
                    response.css("div.richHtml p").extract())
            else:
                description = cleanString(
                    response.css(
                        "#ctl00_PlaceHolderMain_ctl00_divInitializeWrapperClass p"
                    ).extract())
            title = cleanString(
                response.css(
                    "div.benefitInfo_content h2::text").extract_first())
            yield CouponsItem(
                Title=title,
                supplier='100' if
                (re.search(r'/Biz/Pages/', response.url)) else '995' if
                (re.search(r'/BeyahadBishvilha/Pages/',
                           response.url)) else '995',
                brand=filterBrands(
                    cleanString(
                        response.css("div.benefitInfo_content h1::text").
                        extract_first()), self.brands),
                JoinUrl=response.url,
                Description=description,
                ScrapeDate=datetime.now().strftime("%m/%d/%Y, %H:%M:%S"),
                DoorToDoorShipping=any(ext in (description + title)
                                       for ext in allowed_shipping_list),
                cyclerun=self.cycleid)