def get_sale_items(): base_url = "https://espressocanada.com/collections/saeco-philips-and-jura-superautomatic-coffee-machines" soup = scanutil.get_soup(base_url) website = 'espressocanada.com' sale_spans = soup.select('span.sale') sale_items = [] for sale_span in sale_spans: if sale_span.select('span.sold_out'): continue sale_item = { 'type': 'superauto', 'name': sale_span.parent.select('span.title')[0].text.strip(), 'image': f"https:{sale_span.parent.parent.parent.select('img.noscript')[0]['src'].strip()}", 'url': f"https://espressocanada.com{sale_span.parent.parent.parent.select('a')[0]['href'].strip()}", 'regular_price': scanutil.get_money(sale_span.select('span.was_price')[0].text), 'sale_price': scanutil.get_money(sale_span.select('span.money')[0].text), 'website': website } sale_items.append(sale_item) return sale_items
def get_sale_items_for(page_type): base_url = f"https://www.consiglioskitchenware.com/collections/{page_type}?view=view-48" soup = scanutil.get_soup(base_url) website = 'consiglioskitchenware.com' page_links = soup.select('a.pagination--item') if page_links: num_pages = len(page_links) else: num_pages = 1 sale_items = [] for page_num in range(1,num_pages+1): if page_num > 1: soup = scanutil.get_soup(f"{base_url}&page={page_num}") orig_prices = soup.select('div.price--compare-at') for orig_price in orig_prices: if orig_price.text.strip() == "" or orig_price.parent.parent.parent.select('a') == []: continue sale_item = { 'name' : orig_price.parent.parent.select('h2')[0].text.strip(), 'image' : f"https:{orig_price.parent.parent.parent.select('img')[0]['src'].strip()}", 'url' : f"https://consiglioskitchenware.com{orig_price.parent.parent.select('a')[0]['href'].strip()}", 'regular_price' : scanutil.get_money(orig_price.text), 'sale_price' : scanutil.get_money(orig_price.parent.parent.select('div.price--main')[0].text), 'website' : website } sale_items.append(sale_item) return sale_items
def get_sale_items_for(page_type): base_url = f"https://www.espressotec.com/{page_type}?sort=pricedesc&limit=100&mode=4" soup = scanutil.get_soup(base_url) website = 'espressotec.com' sale_items = [] sale_tags = soup.select('span.sale-text') for sale_tag in sale_tags: sale_item = { 'name': sale_tag.parent.parent.parent.select('h4')[0].text.strip(), 'image': sale_tag.parent.parent.select('img')[0]['src'].strip(), 'url': sale_tag.parent.parent.select('a')[0]['href'].strip(), 'regular_price': scanutil.get_money( sale_tag.parent.parent.parent.select('span.price--non-sale') [0].text), 'sale_price': scanutil.get_money( sale_tag.parent.parent.parent.select('span.price--withoutTax') [0].text), 'website': website } sale_items.append(sale_item) return sale_items
def get_sale_items_for(page_type): base_url = f"https://greenbeanery.ca/collections/{page_type}" soup = scanutil.get_soup(base_url) website = 'greenbeanery.ca' sale_items = [] sale_spans = soup.select('span.sale') for sale_span in sale_spans: if sale_span.parent.select('span.sold_out'): continue sale_item = { 'name': sale_span.parent.select('span.title')[0].text.strip(), 'image': f"https:{sale_span.parent.parent.parent.select('img')[0]['data-src'].strip()}", 'url': f"https://greenbeanery.ca{sale_span.parent.parent.parent.select('a')[0]['href'].strip()}", 'regular_price': scanutil.get_money(sale_span.select('span.money')[1].text), 'sale_price': scanutil.get_money(sale_span.select('span.money')[0].text), 'website': website } sale_items.append(sale_item) return sale_items
def get_sale_items_for(page_type): base_url = f"https://www.homecoffeesolutions.com/collections/{page_type}" page = requests.get(base_url) soup = BeautifulSoup(page.content, 'html.parser') website = 'homecoffeesolutions.com' pages = soup.select('div.pagination__nav a') if pages: num_pages = int(int(pages[-1].text) / 2) # Why is it double??? else: num_pages = 1 sale_items = [] for page_num in range(1, num_pages + 1): if page_num > 1: page = requests.get(f"{base_url}?page={page_num}") soup = BeautifulSoup(page.content, 'html.parser') sale_prices = soup.select('span.price--highlight') for sale_price in sale_prices: sale_item = { 'name': sale_price.parent.parent.select('a')[0].text.strip(), 'image': f"https{sale_price.parent.parent.parent.parent.select('img')[0]['src'].strip()}", 'url': f"https://homecoffeesolutions.com{sale_price.parent.parent.select('a')[0]['href'].strip()}", 'regular_price': scanutil.get_money( sale_price.parent.parent.select('span.price--compare') [0].text), 'sale_price': scanutil.get_money(sale_price.text), 'website': website } sale_items.append(sale_item) return sale_items
def get_sale_items_for(page_type): base_url = f"https://www.espressoplanet.com/coffee-espresso/{page_type}/?sort=orderby&sort_direction=0&objects_per_page=1000" soup = scanutil.get_soup(base_url) website = 'espressoplanet.com' market_price_values = soup.select('span.market-price-value') sale_items = [] for market_price_value in market_price_values: sale_item = { 'name' : market_price_value.parent.parent.parent.select('a')[0].text.strip(), 'image' : market_price_value.parent.parent.parent.parent.select('img')[0]['data-src'].strip(), 'url' : f"https://www.espressoplanet.com/{market_price_value.parent.parent.parent.select('a')[0]['href'].strip()}", 'regular_price' : scanutil.get_money(market_price_value.text.strip()), 'sale_price' : scanutil.get_money(market_price_value.parent.parent.select('span.currency')[0].text.strip()), 'website' : website } sale_items.append(sale_item) return sale_items
def get_sale_items_for(page_type, url_fragment): base_url = f"https://www.faema.ca/{url_fragment}?searching=Y&sort=13&cat=1915&show=1000&page=1" soup = scanutil.get_soup(base_url) website = 'faema.ca' sale_items = [] sale_prices = soup.select('div.product_saleprice') for sale_price in sale_prices: sale_item = { 'name' : sale_price.parent.parent.parent.parent.parent.select('a')[0]['title'].strip(), 'image' : f"https://faema.ca{sale_price.parent.parent.parent.parent.parent.select('img')[0]['src'].strip()}", 'url' : sale_price.parent.parent.parent.parent.parent.select('a')[0]['href'].strip(), 'regular_price' : scanutil.get_money(sale_price.parent.parent.parent.select('div.product_productprice')[0].text), 'sale_price' : scanutil.get_money(sale_price.text), 'website' : website } sale_items.append(sale_item) return sale_items
def get_sale_items(): base_url = "https://wholelattelove.ca/collections/all-coffee-makers?_=pf" soup = scanutil.get_soup(base_url) website = 'wholelattelove.ca' sale_items = [] page_num = 1 while True: if page_num > 1: soup = scanutil.get_soup(f"{base_url}&page={page_num}") if len(soup.select('div.product-wrap')) == 0: break sale_spans = soup.select('span.sale') for sale_span in sale_spans: if sale_span.parent.select('span.sold_out'): continue sale_item = { 'name': sale_span.parent.select('h2')[0].text.strip(), 'image': f"https:{sale_span.parent.parent.parent.select('img')[0]['data-src'].strip()}", 'url': f"https://wholelattelove.ca{sale_span.parent.parent.parent.select('a')[0]['href'].strip()}", 'regular_price': scanutil.get_money(sale_span.select('span.money')[1].text), 'sale_price': scanutil.get_money(sale_span.select('span.money')[0].text), 'website': website } sale_items.append(sale_item) page_num += 1 return sale_items
def get_sale_items_for(page_type): base_url = f"https://caffetech.com/collections/{page_type}" soup = scanutil.get_soup(base_url) website = 'caffetech.com' pages = soup.select('ul.pagination__nav a') if pages: num_pages = int(pages[-1].text) else: num_pages = 1 sale_items = [] for page_num in range(1, num_pages + 1): if page_num > 1: soup = scanutil.get_soup(f"{base_url}?page={page_num}") sale_prices = soup.select('span.product-item__price--new') for sale_price in sale_prices: sale_item = { 'name': f"{sale_price.parent.select('h4')[0].text.strip()} - {sale_price.parent.select('a')[0].text.strip()}", 'image': f"https:{sale_price.parent.parent.select('img')[1]['src'].strip()}", 'url': f"http://caffetech.com{sale_price.parent.select('a')[0]['href'].strip()}", 'regular_price': scanutil.get_money( sale_price.parent.select('span.product-item__price--old') [0].text), 'sale_price': scanutil.get_money(sale_price.text), 'website': website } sale_items.append(sale_item) return sale_items
def get_sale_items_for(page_type): base_url = f"https://espressodolce.ca/product-category/espresso-machines/{page_type}/" soup = scanutil.get_soup(base_url) website = 'espressodolce.ca' pages = soup.select('ul.page-numbers a') if pages: num_pages = len(pages) else: num_pages = 1 sale_items = [] for page_num in range(1, num_pages + 1): if page_num > 1: soup = scanutil.get_soup(f"{base_url}/page/{page_num}") orig_prices = soup.select('del') for orig_price in orig_prices: sale_item = { 'name': orig_price.parent.parent.select('p.name')[0].text.strip(), 'image': orig_price.parent.parent.parent.parent.select('img')[0] ['src'].strip(), 'url': orig_price.parent.parent.select('a')[1]['href'].strip(), 'regular_price': scanutil.get_money(orig_price.text), 'sale_price': scanutil.get_money(orig_price.parent.select('ins')[0].text), 'website': website } sale_items.append(sale_item) return sale_items
def get_sale_items_for(page_type): if page_type != "open-box-returns": page_type = f"consumer/{page_type}" base_url = f"https://idrinkcoffee.com/collections/{page_type}" soup = scanutil.get_soup(base_url) website = 'idrinkcoffee.com' pages = soup.select('div.articlePreviewPagination a') if pages: num_pages = int(pages[-1].text) else: num_pages = 1 sale_items = [] for page_num in range(1, num_pages + 1): if page_num > 1: soup = scanutil.get_soup(f"{base_url}?page={page_num}") orig_prices = soup.select('s') for orig_price in orig_prices: sale_item = { 'name': orig_price.parent.parent.select('h2')[0].text.strip(), 'image': f"https:{orig_price.parent.parent.select('img')[0]['data-src'].strip()}", 'url': f"https://www.idrinkcoffee.com/{orig_price.parent.parent['href'].strip()}", 'regular_price': scanutil.get_money(orig_price.text), 'sale_price': scanutil.get_money(orig_price.parent.select('span')[0].text), 'website': website } sale_items.append(sale_item) return sale_items
def get_sale_items_for(page_type, url_fragment): base_url = f"https://www.zcafe.ca/site/{url_fragment}" soup = scanutil.get_soup(base_url) website = 'zcafe.ca' sale_items = [] images = soup.select('img[style]') for image in images: url = image.parent.parent.select('a')[0]['href'].strip() img_src = re.search('.*\((.*)\).*',image['style']).group(1) sale_item = { 'name' : image.parent.parent.parent.parent.select('a.producttitle')[0].text.strip(), 'image' : f"https://zcafe.ca{img_src}", 'url' : url, 'regular_price' : get_regular_price(url), 'sale_price' : scanutil.get_money(image.parent.parent.parent.parent.parent.select('div')[0].text), 'website' : website } sale_items.append(sale_item) return sale_items
def get_regular_price(url): soup = scanutil.get_soup(url) price = scanutil.get_money(soup.select('#ProductInfo_Lbl_product_price_strike')[0].text) return price