示例#1
0
 def parse(self, response):
     html = response.body.decode()
     soup = BeautifulSoup(html, 'lxml')
     coupon_infos = soup.find('div', class_='index_top_box').find_all('div', class_='media')
     for coupon_info in coupon_infos:
         try:
             coupon = CouponItem()
             coupon['type'] = 'coupon'
             coupon['name'] = coupon_info.find('h3', class_='each_box_header').text.strip()
             coupon['site'] = 'www.couponsock.com'
             coupon['description'] = coupon_info.find('p').text.strip()
             coupon['verify'] = False
             coupon['link'] = ''
             coupon['coupon_type'] = 'CODE'
             coupon['expire_at'] = ''
             coupon['code'] = coupon_info.find('div', class_='code_button').find('a').get('code')
             coupon['final_website'] = get_real_url(
                 self.base_url + coupon_info.find('div', class_='code_button').find('a').get('href'))
             coupon['store'] = coupon_info.find('p', class_='more_p_a').find('a').get('href').replace('/store-coupons/',
                                                                                                      '')
             coupon['store_url_name'] = self.base_url + coupon_info.find('p', class_='more_p_a').find('a').get('href')
             coupon['store_description'] = ''
             coupon['store_category'] = ''
             coupon['store_website'] = get_domain_url(coupon['final_website'])
             coupon['store_country'] = 'US'
             coupon['store_picture'] = coupon_info.find('img').get('src')
             coupon['created_at'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
             yield coupon
         except Exception as e:
             print(e)
     pass
示例#2
0
 def parse(self, response):
     html = response.body
     soup = BeautifulSoup(html, 'lxml')
     coupon_infos = soup.find_all(
         'div', class_='wp-block-column')[2].find_all('tr')[1:]
     for coupon_info in coupon_infos:
         coupon = CouponItem()
         coupon['type'] = 'coupon'
         coupon['name'] = coupon_info.find('a').text.strip()
         coupon['site'] = 'thevape.guide'
         coupon['description'] = coupon_info.find_all('td')[1].text.strip()
         coupon['verify'] = False
         coupon['link'] = ''
         coupon['expire_at'] = ''
         coupon['coupon_type'] = 'CODE'
         coupon['code'] = coupon_info.find_all('td')[2].text.strip()
         coupon['final_website'] = get_real_url(
             coupon_info.find('a').get('href'))
         coupon['store'] = coupon_info.find('a').text.strip()
         coupon['store_url_name'] = coupon_info.find('a').get('href')
         coupon['store_description'] = ''
         coupon['store_category'] = ''
         coupon['store_website'] = get_domain_url(coupon['final_website'])
         coupon['store_country'] = 'US'
         coupon['store_picture'] = ''
         coupon['created_at'] = datetime.datetime.now().strftime(
             '%Y-%m-%d %H:%M:%S')
         yield coupon
     pass
示例#3
0
 def coupon_parse(self, response):
     html = response.body
     soup = BeautifulSoup(html, 'lxml')
     button = soup.find('button')
     coupon = CouponItem()
     coupon['type'] = 'coupon'
     coupon['name'] = button.get('title')
     coupon['site'] = 'theseedlingtruck.com'
     coupon['description'] = button.get('data-description')
     coupon['verify'] = False
     coupon['link'] = ''
     coupon['expire_at'] = ''
     coupon['coupon_type'] = 'CODE' if 'code' in button.get(
         'data-classes') else 'DEAL'
     coupon['code'] = button.get('data-code')
     coupon['final_website'] = get_real_url(self.base_url +
                                            button.get('data-url'))
     # coupon['store'] = button.get('data-url')
     coupon['store'] = soup.find('div', class_='post-header-title').find(
         'span', class_='post-title')
     coupon['store'] = coupon['store'].text.replace(
         ' Coupon Codes', '') if coupon['store'] else button.get('data-url')
     coupon['store_url_name'] = self.base_url + button.get('data-url')
     coupon['store_description'] = ''
     coupon['store_category'] = 'CDB DEALS'
     coupon['store_website'] = get_domain_url(coupon['final_website'])
     coupon['store_country'] = 'US'
     coupon['store_picture'] = button.get('data-image')
     coupon['created_at'] = datetime.datetime.now().strftime(
         '%Y-%m-%d %H:%M:%S')
     yield coupon
示例#4
0
    def parse(self, response):
        html = response.body
        category = re.findall(
            r'discount-codes-for/(.+?)/', response.url)[0] if re.findall(
                r'discount-codes-for/(.+?)/', response.url) else ''
        soup = BeautifulSoup(html, 'lxml')
        exit_count = soup.find('b', class_='num').text
        if exit_count != '0':
            current_page = re.findall(r'fwp_paged=(.+?)', response.url)[0]
            next_url = response.url.replace(current_page,
                                            str(int(current_page) + 1))
            yield scrapy.Request(url=next_url, callback=self.parse)
        offers = soup.find_all('div', class_='itemdata')
        for offer in offers:
            expired = offer.find('span',
                                 class_='wlt_shortcodes_expiry_date').text
            if 'expired' in expired:
                continue
            coupon = CouponItem()
            coupon['type'] = 'coupon'
            coupon['name'] = offer.find(
                'div', class_='titletext').find('span').text.strip()
            coupon['site'] = '420.deals'
            coupon['description'] = offer.find(
                'div', class_='excerpttext').find('p').text.strip()
            coupon['verify'] = False
            button = offer.find('div', class_='clicktoreveal')

            # coupon['link'] = offer.find('div', class_='titletext').find('a').get('href')

            coupon['link'] = ''
            coupon['expire_at'] = ''

            coupon['coupon_type'] = 'DEAL' if 'Deal' in button.text else 'CODE'
            coupon['code'] = button.find('div', class_='code').text.strip(
            ) if coupon['coupon_type'] != 'DEAL' else ''
            link = button.find('a').get(
                'href') if coupon['coupon_type'] == 'DEAL' else re.findall(
                    r"href='(.+?)';", button.next_sibling.next_sibling.text)[0]
            coupon['final_website'] = get_real_url(link)
            store_info = offer.find('span', class_='wlt_shortcode_store')
            coupon['store'] = store_info.find('a').text.strip()
            coupon['store_url_name'] = store_info.find('a').get('href')
            coupon['store_description'] = ''
            coupon['store_category'] = category
            coupon['store_website'] = get_domain_url(coupon['final_website'])
            coupon['store_country'] = 'US'
            coupon['store_picture'] = offer.find('img').get('src')
            coupon['created_at'] = datetime.datetime.now().strftime(
                '%Y-%m-%d %H:%M:%S')
            # coupon['status'] = scrapy.Field()
            # coupon['depth'] = scrapy.Field()
            # coupon['download_timeout'] = scrapy.Field()
            # coupon['download_slot'] = scrapy.Field()
            # coupon['download_latency'] = scrapy.Field()
            yield coupon
示例#5
0
    def coupon_parse(self, response):
        html = response.body
        soup = BeautifulSoup(html, 'lxml')
        coupon_infos = soup.find('div', class_='facetwp-template').find_all(
            'div', class_='coupon-box')
        for coupon_info in coupon_infos:
            expired = coupon_info.find('div',
                                       class_='listingexpiry').text.strip()
            if 'expired' in expired:
                continue
            coupon = CouponItem()
            coupon['type'] = 'coupon'
            try:
                coupon['name'] = coupon_info.find(
                    'div', class_='listingtitle').find('a').text.strip()
            except:
                coupon['name'] = ''
            coupon['site'] = 'cannabiscouponcodes.com'
            coupon['description'] = coupon_info.find(
                'div', class_='listingsexcerpt').find('span').text.strip()
            coupon['verify'] = False
            coupon['link'] = ''
            if 'unknown' in expired:
                coupon['expire_at'] = ''
            else:
                script = coupon_info.find(
                    'div', class_='countdowntimer').find('script')

                coupon['expire_at'] = re.findall(r'var dateStr \=\t"(.+?)";',
                                                 str(script))[0]
            coupon['coupon_type'] = 'CODE' if 'Coupon' in coupon_info.find(
                'div', class_='main-deal-button').find('a').text else 'DEAL'

            coupon['created_at'] = datetime.datetime.now().strftime(
                '%Y-%m-%d %H:%M:%S')
            coupon['store_country'] = 'US'
            coupon['store_picture'] = coupon_info.find(
                'div', class_='coupon-box-logo').find('img').get('src')
            coupon['store_category'] = re.findall(
                r'discount-category/(.+?)/', response.url)[0] if re.findall(
                    r'discount-category/(.+?)/', response.url) else ''
            coupon['store'] = coupon_info.find(
                'div', class_='listingsstore').find('a').text.strip()
            coupon['store_url_name'] = coupon_info.find(
                'div', class_='listingsstore').find('a').get('href')
            coupon['store_description'] = ''

            coupon_id = coupon_info.find(
                'div',
                class_='main-deal-button').find('a').get('data-couponid')
            code_get_url = self.base_code_url % coupon_id
            yield scrapy.Request(url=code_get_url,
                                 callback=self.code_parse,
                                 meta={'item': coupon})
示例#6
0
    def parse(self, response):
        html = response.body
        soup = BeautifulSoup(html, 'lxml')
        coupon_infos = soup.find_all('div', class_='vc_column-inner ')
        for coupon_info in coupon_infos:
            try:
                coupon = CouponItem()
                coupon['type'] = 'coupon'
                code_info = coupon_info.find_all(
                    'h4',
                    class_='vc_custom_heading')[-1].text.replace('\n', '')
                coupon['name'] = code_info
                coupon['site'] = 'www.cbdoilusers.com'
                code = re.findall(r':(.+?) - ', code_info)
                description = re.findall(r' - (.+?)$', code_info)[0]
                coupon['description'] = description[
                    0] if description else re.findall(r':(.+?)', code_info)[0]
                coupon['verify'] = False
                coupon['link'] = ''
                coupon['expire_at'] = ''
                coupon['code'] = code[0] if code else ''

                coupon['coupon_type'] = 'CODE' if code else 'DEAL'
                coupon['final_website'] = get_real_url(
                    coupon_info.find('a').get('href'))
                coupon['store'] = coupon_info.find(
                    'h3', class_='vc_custom_heading').find('a').text.strip()
                coupon['store_url_name'] = coupon_info.find(
                    'h3', class_='vc_custom_heading').find('a').get('href')
                coupon['store_description'] = ''
                coupon['store_category'] = 'CBD OIL'
                coupon['store_website'] = get_domain_url(
                    coupon['final_website'])
                coupon['store_country'] = 'US'
                coupon['store_picture'] = coupon_info.find('img').get('src')
                coupon['created_at'] = datetime.datetime.now().strftime(
                    '%Y-%m-%d %H:%M:%S')
                yield coupon
            except Exception as e:
                print(e)
                pass
        pass
示例#7
0
 def coupon_parse(self, response):
     html = response.body
     soup = BeautifulSoup(html, 'lxml')
     main_coupon_info = soup.find('div', class_='store-offer-featured')
     main_coupon = CouponItem()
     main_coupon['type'] = 'coupon'
     main_coupon['name'] = main_coupon_info.find('h2').text.strip()
     main_coupon['site'] = 'saveoncannabis.com'
     main_coupon['description'] = ''
     main_coupon['verify'] = True
     main_coupon['link'] = ''
     main_coupon['expire_at'] = main_coupon_info.find(
         'div', class_='deal-countdown-info').text.strip().replace(
             'Expires in: ', '')
     main_coupon['expire_at'] = '' if 'Unlimited Time' in main_coupon[
         'expire_at'] else main_coupon['expire_at']
     main_coupon['coupon_type'] = 'CODE'
     offer_id = main_coupon_info.find(
         'div',
         class_='featured-coupon-button').find('a').get('data-offer_id')
     main_coupon['final_website'] = get_real_url(
         main_coupon_info.find('div', class_='featured-coupon-button').find(
             'a').get('data-affiliate'))
     main_coupon['store'] = soup.find(
         'section', class_='page-title').find('h1').text.strip()
     main_coupon['store_description'] = ''
     main_coupon['store_category'] = main_coupon_info.find(
         'div', class_='featured-coupon-meta').find('a').text.strip()
     main_coupon['store_website'] = get_domain_url(
         main_coupon['final_website'])
     main_coupon['store_country'] = 'US'
     main_coupon['store_picture'] = soup.find(
         'div', class_='shop-logo').find('img').get('src')
     main_coupon['store_url_name'] = soup.find(
         'div', class_='shop-logo').find('a').get('href')
     main_coupon['created_at'] = datetime.datetime.now().strftime(
         '%Y-%m-%d %H:%M:%S')
     # yield scrapy.FormRequest(url='https://www.saveoncannabis.com/wp-admin/admin-ajax.php',
     #                          formdata={'action': 'show_code', 'offer_id': offer_id}, callback=self.code_paese,
     #                          dont_filter=True, meta={'item': main_coupon})
     yield main_coupon
示例#8
0
    def coupon_parse(self, response):
        html = response.body
        soup = BeautifulSoup(html, 'lxml')
        coupon_infos = soup.find_all('div', class_='type-coupon')
        store_info = soup.find('div', class_='store')
        for coupon_info in coupon_infos:
            coupon = CouponItem()
            if coupon_info.find('p', class_='expired_msg'):
                continue

            coupon['type'] = 'coupon'
            # coupon['name'] = coupon_info.find('h3', class_='entry-title').find('a').get('title')
            coupon['name'] = coupon_info.find(
                'h3', class_='entry-title').find('a').text.strip()
            coupon['site'] = 'vaping.coupons'
            coupon['description'] = ''
            coupon['verify'] = False
            coupon['link'] = ''
            coupon['expire_at'] = coupon_info.find(
                'li', class_='expire').get('datetime')
            button = coupon_info.find('div', class_='link-holder').find('a')
            coupon['coupon_type'] = 'DEAL' if 'Redeem' in button.get(
                'data-clipboard-text') else 'CODE'
            coupon['code'] = button.get(
                'data-clipboard-text'
            ) if coupon['coupon_type'] == 'CODE' else ''
            coupon['final_website'] = get_real_url(button.get('href'))
            coupon['store'] = store_info.find('h1').text.strip()
            coupon['store_url_name'] = button.get('href')
            coupon['store_description'] = store_info.find(
                'div', class_='desc').text.strip()
            coupon['store_category'] = coupon_info.find(
                'p', class_='tag').text.replace('Tags:', '').strip()
            coupon['store_website'] = get_domain_url(coupon['final_website'])
            coupon['store_country'] = 'US'
            coupon['store_picture'] = store_info.find('img').get('src')
            coupon['created_at'] = datetime.datetime.now().strftime(
                '%Y-%m-%d %H:%M:%S')
            yield coupon
示例#9
0
 def store_page_parse(self, response):
     html = response.body
     soup = BeautifulSoup(html, 'lxml')
     store_item = StoreItem()
     # 处理字段定位
     # store
     store_item['type'] = 'store'
     store_item['logo_url'] = 'https:' + soup.find(
         'div', id='company-identity').a.img.get('src')
     store_item['title'] = soup.find(
         'div', id='offer-section').find('strong').text.strip()
     store_item['name'] = store_item['title']
     store_item['site'] = 'offers'
     store_item['url_name'] = response.url.split('/')[-2]
     store_item['description'] = soup.find(
         'div', id='company-information').find('p').text
     store_item['category'] = soup.find_all(
         'a', itemprop='item')[-1].find('span').text
     store_item['website'] = get_real_url(
         self.base_url +
         soup.find('div', id='company-identity').a.get('href'))
     store_item['country'] = "US"
     store_item['picture'] = scrapy.Field()
     store_item['coupon_count'] = soup.find(
         'div', id='merchant-stats').find('tr').find('span').text
     store_item['created_at'] = datetime.datetime.now().strftime(
         '%Y-%m-%d %H:%M:%S')
     store_item['final_website'] = get_domin_url(store_item['website'])
     if store_item['final_website'] == '' or store_item[
             'final_website'] is None or store_item[
                 'final_website'] == '#' or store_item[
                     'final_website'] == 'https://www.offers.com':
         print(store_item['final_website'])
     # coupon
     for offer in soup.find_all('div', class_='offerstrip'):
         if 'expired' in offer.parent.get('class'):
             continue
         coupon_item = CouponItem()
         coupon_item['type'] = 'coupon'
         coupon_item['name'] = offer.find('h3', class_='name').text.strip()
         coupon_item['site'] = 'offers'
         description = offer.find('div', class_='more-details')
         coupon_item['description'] = description.find(
             'p').text.strip() if description else ""
         try:
             coupon_item['verify'] = 'Y' if offer.find(
                 'span', class_='verified').find(
                     'strong').text == "Verified" else "N"
         except:
             coupon_item['verify'] = 'N'
         coupon_item['link'] = self.base_url + offer.find('a').get('href')
         coupon_item['expire_at'] = None
         try:
             div = offer.find('div', class_='badge-text')
             span = offer.find('span', class_='dolphin flag')
             coupon_type = div.text if div else ''
             coupon_type += span.text if span else ''
         except:
             coupon_item['coupon_type'] = "DEAL"
         if 'code' in coupon_type:
             data_offer_id = offer.get('data-offer-id')
             long_id = coupon_item['link'].split('/')[-2]
             code_get_url = self.code_url.replace('code_id',
                                                  data_offer_id).replace(
                                                      'long_id', long_id)
             res = requests.get(code_get_url, headers=get_header())
             code = re.findall(r'<div class="coupon-code">(.+?)</div>',
                               res.content.decode())
             coupon_item['code'] = code[0] if code else ''
             coupon_item['coupon_type'] = "CODE"
         else:
             coupon_item['coupon_type'] = "DEAL"
             coupon_item['code'] = ''
         coupon_item['final_website'] = store_item['final_website']
         coupon_item['store'] = store_item['title']
         coupon_item['store_url_name'] = store_item['url_name']
         coupon_item['store_description'] = store_item['description']
         coupon_item['store_category'] = store_item['category']
         coupon_item['store_website'] = store_item['website']
         coupon_item['store_country'] = "US"
         coupon_item['store_picture'] = store_item['logo_url']
         coupon_item['created_at'] = datetime.datetime.now().strftime(
             '%Y-%m-%d %H:%M:%S')
         coupon_item['status'] = '0'
         # coupon_item['depth'] = scrapy.Field()
         # coupon_item['download_timeout'] = scrapy.Field()
         # coupon_item['download_slot'] = scrapy.Field()
         # coupon_item['download_latency'] = scrapy.Field()
         yield coupon_item
     yield store_item
     pass