Exemplo n.º 1
0
def get_sale_items():
    base_url = "https://espressocanada.com/collections/saeco-philips-and-jura-superautomatic-coffee-machines"
    soup = scanutil.get_soup(base_url)
    website = 'espressocanada.com'

    sale_spans = soup.select('span.sale')

    sale_items = []

    for sale_span in sale_spans:
        if sale_span.select('span.sold_out'):
            continue

        sale_item = {
            'type':
            'superauto',
            'name':
            sale_span.parent.select('span.title')[0].text.strip(),
            'image':
            f"https:{sale_span.parent.parent.parent.select('img.noscript')[0]['src'].strip()}",
            'url':
            f"https://espressocanada.com{sale_span.parent.parent.parent.select('a')[0]['href'].strip()}",
            'regular_price':
            scanutil.get_money(sale_span.select('span.was_price')[0].text),
            'sale_price':
            scanutil.get_money(sale_span.select('span.money')[0].text),
            'website':
            website
        }

        sale_items.append(sale_item)

    return sale_items
def get_sale_items_for(page_type):
    base_url = f"https://www.consiglioskitchenware.com/collections/{page_type}?view=view-48"
    soup = scanutil.get_soup(base_url)
    website = 'consiglioskitchenware.com'

    page_links = soup.select('a.pagination--item')
    if page_links:
        num_pages = len(page_links)
    else:
        num_pages = 1

    sale_items = []

    for page_num in range(1,num_pages+1):
        if page_num > 1:
            soup = scanutil.get_soup(f"{base_url}&page={page_num}")

        orig_prices = soup.select('div.price--compare-at')
        for orig_price in orig_prices:
            if orig_price.text.strip() == "" or orig_price.parent.parent.parent.select('a') == []:
                continue

            sale_item = {
                'name' : orig_price.parent.parent.select('h2')[0].text.strip(),
                'image' : f"https:{orig_price.parent.parent.parent.select('img')[0]['src'].strip()}",
                'url' : f"https://consiglioskitchenware.com{orig_price.parent.parent.select('a')[0]['href'].strip()}",
                'regular_price' : scanutil.get_money(orig_price.text),
                'sale_price' : scanutil.get_money(orig_price.parent.parent.select('div.price--main')[0].text),
                'website' : website
            }
            sale_items.append(sale_item)

    return sale_items
Exemplo n.º 3
0
def get_sale_items_for(page_type):
    base_url = f"https://www.espressotec.com/{page_type}?sort=pricedesc&limit=100&mode=4"
    soup = scanutil.get_soup(base_url)
    website = 'espressotec.com'

    sale_items = []

    sale_tags = soup.select('span.sale-text')
    for sale_tag in sale_tags:
        sale_item = {
            'name':
            sale_tag.parent.parent.parent.select('h4')[0].text.strip(),
            'image':
            sale_tag.parent.parent.select('img')[0]['src'].strip(),
            'url':
            sale_tag.parent.parent.select('a')[0]['href'].strip(),
            'regular_price':
            scanutil.get_money(
                sale_tag.parent.parent.parent.select('span.price--non-sale')
                [0].text),
            'sale_price':
            scanutil.get_money(
                sale_tag.parent.parent.parent.select('span.price--withoutTax')
                [0].text),
            'website':
            website
        }
        sale_items.append(sale_item)

    return sale_items
Exemplo n.º 4
0
def get_sale_items_for(page_type):
    base_url = f"https://greenbeanery.ca/collections/{page_type}"
    soup = scanutil.get_soup(base_url)
    website = 'greenbeanery.ca'

    sale_items = []

    sale_spans = soup.select('span.sale')
    for sale_span in sale_spans:

        if sale_span.parent.select('span.sold_out'):
            continue

        sale_item = {
            'name':
            sale_span.parent.select('span.title')[0].text.strip(),
            'image':
            f"https:{sale_span.parent.parent.parent.select('img')[0]['data-src'].strip()}",
            'url':
            f"https://greenbeanery.ca{sale_span.parent.parent.parent.select('a')[0]['href'].strip()}",
            'regular_price':
            scanutil.get_money(sale_span.select('span.money')[1].text),
            'sale_price':
            scanutil.get_money(sale_span.select('span.money')[0].text),
            'website':
            website
        }
        sale_items.append(sale_item)

    return sale_items
def get_sale_items_for(page_type):
    base_url = f"https://www.homecoffeesolutions.com/collections/{page_type}"
    page = requests.get(base_url)
    soup = BeautifulSoup(page.content, 'html.parser')
    website = 'homecoffeesolutions.com'

    pages = soup.select('div.pagination__nav a')
    if pages:
        num_pages = int(int(pages[-1].text) / 2)
        # Why is it double???
    else:
        num_pages = 1

    sale_items = []

    for page_num in range(1, num_pages + 1):
        if page_num > 1:
            page = requests.get(f"{base_url}?page={page_num}")
            soup = BeautifulSoup(page.content, 'html.parser')

        sale_prices = soup.select('span.price--highlight')
        for sale_price in sale_prices:

            sale_item = {
                'name':
                sale_price.parent.parent.select('a')[0].text.strip(),
                'image':
                f"https{sale_price.parent.parent.parent.parent.select('img')[0]['src'].strip()}",
                'url':
                f"https://homecoffeesolutions.com{sale_price.parent.parent.select('a')[0]['href'].strip()}",
                'regular_price':
                scanutil.get_money(
                    sale_price.parent.parent.select('span.price--compare')
                    [0].text),
                'sale_price':
                scanutil.get_money(sale_price.text),
                'website':
                website
            }
            sale_items.append(sale_item)

    return sale_items
Exemplo n.º 6
0
def get_sale_items_for(page_type):
    base_url = f"https://www.espressoplanet.com/coffee-espresso/{page_type}/?sort=orderby&sort_direction=0&objects_per_page=1000"
    soup = scanutil.get_soup(base_url)
    website = 'espressoplanet.com'

    market_price_values = soup.select('span.market-price-value')

    sale_items = []
    for market_price_value in market_price_values:
        sale_item = {
            'name' : market_price_value.parent.parent.parent.select('a')[0].text.strip(),
            'image' : market_price_value.parent.parent.parent.parent.select('img')[0]['data-src'].strip(),
            'url' : f"https://www.espressoplanet.com/{market_price_value.parent.parent.parent.select('a')[0]['href'].strip()}",
            'regular_price' : scanutil.get_money(market_price_value.text.strip()),
            'sale_price' : scanutil.get_money(market_price_value.parent.parent.select('span.currency')[0].text.strip()),
            'website' : website
        }
        sale_items.append(sale_item)

    return sale_items
Exemplo n.º 7
0
def get_sale_items_for(page_type, url_fragment):
    base_url = f"https://www.faema.ca/{url_fragment}?searching=Y&sort=13&cat=1915&show=1000&page=1"
    soup = scanutil.get_soup(base_url)
    website = 'faema.ca'

    sale_items = []

    sale_prices = soup.select('div.product_saleprice')
    for sale_price in sale_prices:
        sale_item = {
            'name' : sale_price.parent.parent.parent.parent.parent.select('a')[0]['title'].strip(),
            'image' : f"https://faema.ca{sale_price.parent.parent.parent.parent.parent.select('img')[0]['src'].strip()}",
            'url' : sale_price.parent.parent.parent.parent.parent.select('a')[0]['href'].strip(),
            'regular_price' : scanutil.get_money(sale_price.parent.parent.parent.select('div.product_productprice')[0].text),
            'sale_price' : scanutil.get_money(sale_price.text),
            'website' : website
        }
        sale_items.append(sale_item)
        
    return sale_items
Exemplo n.º 8
0
def get_sale_items():
    base_url = "https://wholelattelove.ca/collections/all-coffee-makers?_=pf"
    soup = scanutil.get_soup(base_url)
    website = 'wholelattelove.ca'

    sale_items = []

    page_num = 1
    while True:
        if page_num > 1:
            soup = scanutil.get_soup(f"{base_url}&page={page_num}")

        if len(soup.select('div.product-wrap')) == 0:
            break

        sale_spans = soup.select('span.sale')
        for sale_span in sale_spans:
            if sale_span.parent.select('span.sold_out'):
                continue

            sale_item = {
                'name':
                sale_span.parent.select('h2')[0].text.strip(),
                'image':
                f"https:{sale_span.parent.parent.parent.select('img')[0]['data-src'].strip()}",
                'url':
                f"https://wholelattelove.ca{sale_span.parent.parent.parent.select('a')[0]['href'].strip()}",
                'regular_price':
                scanutil.get_money(sale_span.select('span.money')[1].text),
                'sale_price':
                scanutil.get_money(sale_span.select('span.money')[0].text),
                'website':
                website
            }
            sale_items.append(sale_item)

        page_num += 1

    return sale_items
Exemplo n.º 9
0
def get_sale_items_for(page_type):
    base_url = f"https://caffetech.com/collections/{page_type}"
    soup = scanutil.get_soup(base_url)
    website = 'caffetech.com'

    pages = soup.select('ul.pagination__nav a')
    if pages:
        num_pages = int(pages[-1].text)
    else:
        num_pages = 1

    sale_items = []

    for page_num in range(1, num_pages + 1):
        if page_num > 1:
            soup = scanutil.get_soup(f"{base_url}?page={page_num}")

        sale_prices = soup.select('span.product-item__price--new')
        for sale_price in sale_prices:
            sale_item = {
                'name':
                f"{sale_price.parent.select('h4')[0].text.strip()} - {sale_price.parent.select('a')[0].text.strip()}",
                'image':
                f"https:{sale_price.parent.parent.select('img')[1]['src'].strip()}",
                'url':
                f"http://caffetech.com{sale_price.parent.select('a')[0]['href'].strip()}",
                'regular_price':
                scanutil.get_money(
                    sale_price.parent.select('span.product-item__price--old')
                    [0].text),
                'sale_price':
                scanutil.get_money(sale_price.text),
                'website':
                website
            }
            sale_items.append(sale_item)

    return sale_items
Exemplo n.º 10
0
def get_sale_items_for(page_type):
    base_url = f"https://espressodolce.ca/product-category/espresso-machines/{page_type}/"
    soup = scanutil.get_soup(base_url)
    website = 'espressodolce.ca'

    pages = soup.select('ul.page-numbers a')
    if pages:
        num_pages = len(pages)
    else:
        num_pages = 1

    sale_items = []

    for page_num in range(1, num_pages + 1):
        if page_num > 1:
            soup = scanutil.get_soup(f"{base_url}/page/{page_num}")

        orig_prices = soup.select('del')

        for orig_price in orig_prices:
            sale_item = {
                'name':
                orig_price.parent.parent.select('p.name')[0].text.strip(),
                'image':
                orig_price.parent.parent.parent.parent.select('img')[0]
                ['src'].strip(),
                'url':
                orig_price.parent.parent.select('a')[1]['href'].strip(),
                'regular_price':
                scanutil.get_money(orig_price.text),
                'sale_price':
                scanutil.get_money(orig_price.parent.select('ins')[0].text),
                'website':
                website
            }
            sale_items.append(sale_item)

    return sale_items
Exemplo n.º 11
0
def get_sale_items_for(page_type):
    if page_type != "open-box-returns":
        page_type = f"consumer/{page_type}"

    base_url = f"https://idrinkcoffee.com/collections/{page_type}"
    soup = scanutil.get_soup(base_url)
    website = 'idrinkcoffee.com'

    pages = soup.select('div.articlePreviewPagination a')
    if pages:
        num_pages = int(pages[-1].text)
    else:
        num_pages = 1

    sale_items = []
    for page_num in range(1, num_pages + 1):
        if page_num > 1:
            soup = scanutil.get_soup(f"{base_url}?page={page_num}")

        orig_prices = soup.select('s')
        for orig_price in orig_prices:
            sale_item = {
                'name':
                orig_price.parent.parent.select('h2')[0].text.strip(),
                'image':
                f"https:{orig_price.parent.parent.select('img')[0]['data-src'].strip()}",
                'url':
                f"https://www.idrinkcoffee.com/{orig_price.parent.parent['href'].strip()}",
                'regular_price':
                scanutil.get_money(orig_price.text),
                'sale_price':
                scanutil.get_money(orig_price.parent.select('span')[0].text),
                'website':
                website
            }
            sale_items.append(sale_item)

    return sale_items
Exemplo n.º 12
0
def get_sale_items_for(page_type, url_fragment):
    base_url = f"https://www.zcafe.ca/site/{url_fragment}"
    soup = scanutil.get_soup(base_url)
    website = 'zcafe.ca'

    sale_items = []

    images = soup.select('img[style]')
    for image in images:
        url = image.parent.parent.select('a')[0]['href'].strip()
        img_src = re.search('.*\((.*)\).*',image['style']).group(1)
        sale_item = {
            'name' : image.parent.parent.parent.parent.select('a.producttitle')[0].text.strip(),
            'image' : f"https://zcafe.ca{img_src}",
            'url' : url,
            'regular_price' : get_regular_price(url),
            'sale_price' : scanutil.get_money(image.parent.parent.parent.parent.parent.select('div')[0].text),
            'website' : website
        }
        sale_items.append(sale_item)
        
    return sale_items
Exemplo n.º 13
0
def get_regular_price(url):
    soup = scanutil.get_soup(url)
    price = scanutil.get_money(soup.select('#ProductInfo_Lbl_product_price_strike')[0].text)
    return price