Пример #1
0
def spr_scrape_deals_page():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options, executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get('https://www.sprint.com/en/shop/offers.html')
    time.sleep(5)

    # go to Phones url (since url could change)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")


    # make promotions object
    scraped_promotion = ScrapedPromotion()

    # hard coded variables
    scraped_promotion.provider = 'sprint'
    scraped_promotion.date = datetime.date.today()
    scraped_promotion.time = datetime.datetime.now().time()
    scraped_promotion.device_storage = '0'
    scraped_promotion.promo_location = 'deals page'
    scraped_promotion.url = driver.current_url

    for div in soup.findAll('div', class_='ui-bucket__content'):
        deals_page_promo = div.text.strip().replace('\n', '')
        scraped_promotion.promo_text = deals_page_promo
        print(scraped_promotion.provider, scraped_promotion.device_name,  scraped_promotion.device_storage,
              scraped_promotion.promo_location, scraped_promotion.promo_text, scraped_promotion.url,
              scraped_promotion.date, scraped_promotion.time)
        add_scraped_promotions_to_database(scraped_promotion.provider, scraped_promotion.device_name,
                                           scraped_promotion.device_storage, scraped_promotion.promo_location,
                                           scraped_promotion.promo_text, scraped_promotion.url, scraped_promotion.date,
                                           scraped_promotion.time)
    driver.quit()
Пример #2
0
def met_scrape_prepaid_promotins(soup, url, device_name, device_storage):
    # make object
    scraped_promotion = ScrapedPromotion()

    # set variables already determined
    scraped_promotion.provider = 'metropcs'
    scraped_promotion.device_name = device_name
    scraped_promotion.device_storage = device_storage
    scraped_promotion.url = url

    # make empty list of promotions
    promotions = []

    # crossed out price
    try:
        crossed_out_price = soup.find('span', class_='normal-price')
        promotions.append([
            'crossed out price',
            crossed_out_price.text.strip().replace('\n', '').replace(
                '                                ', '.')
        ])
    except AttributeError:
        crossed_out_price = ''

    # make object for each promo text instance
    for promo_instance in promotions:
        scraped_promotion.promo_location = promo_instance[0]
        scraped_promotion.promo_text = promo_instance[1]

        # time variables
        scraped_promotion.date = datetime.date.today()
        scraped_promotion.time = datetime.datetime.now().time()

        # add to database
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)
def xfi_scrape_prepaid_promotins(url, device_name, device_storage,
                                 description):
    # make object
    scraped_promotion = ScrapedPromotion()

    # set variables already determined
    scraped_promotion.provider = 'xfinity'
    scraped_promotion.device_name = device_name
    scraped_promotion.device_storage = device_storage
    scraped_promotion.url = url
    scraped_promotion.promo_text = description
    scraped_promotion.promo_location = 'description'

    # time variables
    scraped_promotion.date = datetime.date.today()
    scraped_promotion.time = datetime.datetime.now().time()

    # add to database
    add_scraped_promotions_to_database(
        scraped_promotion.provider, scraped_promotion.device_name,
        scraped_promotion.device_storage, scraped_promotion.promo_location,
        scraped_promotion.promo_text, scraped_promotion.url,
        scraped_promotion.date, scraped_promotion.time)
def att_scrape_postpaid_promotions(soup, url, device_name, device_storage):
    # make object
    scraped_promotion = ScrapedPromotion()

    # set variables already determined
    scraped_promotion.provider = 'att'
    scraped_promotion.device_name = device_name
    scraped_promotion.device_storage = device_storage
    scraped_promotion.url = url

    # make empty list of promotions
    promotions = []

    # upper banner text
    for span in soup.findAll("span", class_="offerTxt"):
        if span.text.strip() != '':
            upper_banner_text = span.text.strip()
            promotions.append(['upper banner', upper_banner_text])

    # lower banner text
    for div in soup.findAll("div", class_="ds2MarketingMessageTextStyle"):
        promotions.append(['lower banner', div.text])

    # make object for each promo text instance
    for promo_instance in promotions:
        scraped_promotion.promo_location = promo_instance[0]
        scraped_promotion.promo_text = promo_instance[1]

        # hardcoded variables
        scraped_promotion.date = datetime.date.today()
        scraped_promotion.time = datetime.datetime.now().time()

        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)
Пример #5
0
def cri_scrape_prepaid_promotions(driver, url, device_name, device_storage):
    # make object
    scraped_promotion = ScrapedPromotion()

    # set variables already determined
    scraped_promotion.provider = 'cricket'
    scraped_promotion.device_name = device_name
    scraped_promotion.device_storage = device_storage
    scraped_promotion.url = url

    # make empty list of promotions
    promotions = []

    # sale price
    try:
        sale_price = driver.find_element_by_xpath(
            '//*[@id="pricingWrapper"]/div[1]/div[1]')
        promotions.append([
            'sale price',
            sale_price.text.strip().replace('\n', '').replace(
                '                           ', '')
        ])
    except NoSuchElementException:
        sale_price = ''

    # make object for each promo text instance
    for promo_instance in promotions:
        scraped_promotion.promo_location = promo_instance[0]
        scraped_promotion.promo_text = promo_instance[1]
        scraped_promotion.date = datetime.date.today()
        scraped_promotion.time = datetime.datetime.now().time()

        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)
Пример #6
0
def att_scrape_homepage():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.implicitly_wait(6)

    # go to website
    driver.get('https://www.att.com/')
    time.sleep(10)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_promotion = ScrapedPromotion()

    # set hardcoded variables
    scraped_promotion.date = datetime.date.today()
    scraped_promotion.time = datetime.datetime.now().time()
    scraped_promotion.provider = 'att'
    scraped_promotion.promo_location = 'homepage'
    scraped_promotion.url = driver.current_url
    scraped_promotion.device_storage = '0'

    for slideshow in soup.findAll('div', class_='content-wrapper'):
        deals_page_promo = slideshow.text.strip().replace('\n', '')
        scraped_promotion.promo_text = deals_page_promo
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)

    for div in soup.findAll('div', class_='item-wrapper')[1:]:
        deals_page_promo = div.text.strip().replace('\n', '')
        scraped_promotion.promo_text = deals_page_promo
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)
        item_details = div.find("div", {"class": "legal"})
        legal_link = "https://www.att.com/" + item_details.a["data-ajaxdata"]
        driver.get(legal_link)
        time.sleep(2)
        html = driver.page_source
        legal_soup = BeautifulSoup(html, "html.parser")
        for legal_terms in legal_soup.body.findAll("div")[1:]:
            scraped_promotion.promo_text = "LEGAL TERMS: " + legal_terms.text.strip(
            )
            add_scraped_promotions_to_database(
                scraped_promotion.provider, scraped_promotion.device_name,
                scraped_promotion.device_storage,
                scraped_promotion.promo_location, scraped_promotion.promo_text,
                scraped_promotion.url, scraped_promotion.date,
                scraped_promotion.time)

    for row in soup.findAll('div', class_='row no-flex'):
        deals_page_promo = row.text.strip().replace('\n', '')
        scraped_promotion.promo_text = deals_page_promo
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)

    driver.quit()
def met_scrape_deals_page():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get('https://www.metropcs.com/shop/deals')
    time.sleep(5)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_promotion = ScrapedPromotion()

    # set hardcoded variables
    scraped_promotion.provider = 'metropcs'
    scraped_promotion.date = datetime.date.today()
    scraped_promotion.time = datetime.datetime.now().time()
    scraped_promotion.device_storage = '0'
    scraped_promotion.device_name = 'N/A'
    scraped_promotion.url = driver.current_url
    scraped_promotion.promo_location = 'deals page'

    # get first banner
    for div in soup.findAll(
            'div', class_='col-md-12 col-xs-12 p-t-30-lg p-t-10-md text-left'):
        deals_page_promo = div.text.strip().replace('\n', '')
        scraped_promotion.promo_text = deals_page_promo
        print(scraped_promotion.provider, scraped_promotion.device_name,
              scraped_promotion.device_storage,
              scraped_promotion.promo_location, scraped_promotion.promo_text,
              scraped_promotion.url, scraped_promotion.date,
              scraped_promotion.time)
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)

    # get promotions
    for div in soup.findAll('div', class_=' col-xs-12 col-sm-6'):
        for div1 in div.findAll('div', class_='m-b-10'):
            deals_page_promo = div1.a.img['alt']
            scraped_promotion.promo_text = deals_page_promo
            print(scraped_promotion.provider, scraped_promotion.device_name,
                  scraped_promotion.device_storage,
                  scraped_promotion.promo_location,
                  scraped_promotion.promo_text, scraped_promotion.url,
                  scraped_promotion.date, scraped_promotion.time)
            add_scraped_promotions_to_database(
                scraped_promotion.provider, scraped_promotion.device_name,
                scraped_promotion.device_storage,
                scraped_promotion.promo_location, scraped_promotion.promo_text,
                scraped_promotion.url, scraped_promotion.date,
                scraped_promotion.time)

    driver.close()
def ver_scrape_postpaid_promotions(soup, driver, url, device_name, device_storage):
    # make object
    scraped_promotion = ScrapedPromotion()

    # set variables already determined
    scraped_promotion.provider = 'verizon'
    scraped_promotion.device_name = device_name
    scraped_promotion.device_storage = device_storage
    scraped_promotion.url = url

    # make empty list of promotions
    promotions = []

    # alternate way to get banner text
    upper_banner_text_2 = driver.find_element_by_class_name('clearfix')
    if upper_banner_text_2.text.strip() != '':
        promotions.append(['upper banner', upper_banner_text_2.text.strip()])

    # crossed out price
    pricing_options = soup.findAll('div', class_='pad8 noRightPad')
    for div in pricing_options:
        if 'was' in div.text:
            promotions.append(['crossed out price', div.text.replace('2-Year Contract', ' 2-Year Contract').replace('24 Monthly Payments',' 24 Monthly Payments').replace('was ', ' was')])

    # each payment option has its own banners
    for option in range(1, len(pricing_options) + 1):
        option_button = driver.find_element_by_xpath('//*[@id="tile_container"]/div[1]/div[3]/div[1]/div/div[2]/div/div/div[1]/div/div[' + str(option) + ']/div/div/div')

        # PAYMENT LEVEL
        # click on different payment options to show different promos
        # if popup is there, remove it before clicking
        try:
            option_button.click()
        except WebDriverException:
            driver.find_element_by_class_name('fsrCloseBtn').click()
            option_button.click()
        time.sleep(2)
        html = driver.page_source
        soup = BeautifulSoup(html, "html.parser")

        # promotion text above device icon
        try:
            banner_above_icon = soup.find('div', class_='offersPad fontSize_12 lineHeight8')
            promotions.append(['banner above device icon', banner_above_icon.text.replace('Special Offer', '').replace('See the details', '').replace('\n', '')])
        except AttributeError:
            banner_above_icon = ''

        # banner under price
        below_price_banner = soup.find('div', class_='row padTop6 noSideMargin priceLabel').text
        if below_price_banner != 'Retail Price' and below_price_banner != 'Early Termination Fee: $175 (2-Year Contracts)':
            promotions.append(['banner below price', below_price_banner])

    # make object for each promo text instance
    for promo_instance in promotions:
        scraped_promotion.promo_location = promo_instance[0]
        scraped_promotion.promo_text = promo_instance[1]

        # hardcoded variables
        scraped_promotion.date = datetime.date.today()
        scraped_promotion.time = datetime.datetime.now().time()

        # add to database
        add_scraped_promotions_to_database(scraped_promotion.provider, scraped_promotion.device_name,
                                           scraped_promotion.device_storage, scraped_promotion.promo_location,
                                           scraped_promotion.promo_text, scraped_promotion.url,
                                           scraped_promotion.date, scraped_promotion.time)
Пример #9
0
def cri_scrape_deals_page():
    # headless Chrome
    chrome_options = Options()
    chrome_options.add_argument("--headless")
    chrome_options.add_argument("--window-size=1920x1080")
    chrome_driver = os.getcwd() + "\\chromedriver.exe"
    driver = webdriver.Chrome(chrome_options=chrome_options,
                              executable_path=chrome_driver)
    driver.implicitly_wait(5)

    # go to website
    driver.get('https://www.cricketwireless.com/current-phone-and-plan-deals')
    time.sleep(3)
    html = driver.page_source
    soup = BeautifulSoup(html, "html.parser")

    # make object
    scraped_promotion = ScrapedPromotion()

    # set hardcoded variables
    scraped_promotion.provider = 'cricket'
    scraped_promotion.date = datetime.date.today()
    scraped_promotion.time = datetime.datetime.now().time()
    scraped_promotion.promo_location = 'deals page'
    scraped_promotion.device_name = 'N/A'
    scraped_promotion.device_storage = '0'
    scraped_promotion.url = driver.current_url

    # get big green promo banner
    for div in soup.findAll('div', class_='hero-promo hover-item'):
        deals_page_promo = format_promo_text(div.text)
        scraped_promotion.promo_text = deals_page_promo
        print(scraped_promotion.provider, scraped_promotion.device_name,
              scraped_promotion.device_storage,
              scraped_promotion.promo_location, scraped_promotion.promo_text,
              scraped_promotion.url, scraped_promotion.date,
              scraped_promotion.time)
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)

    # get other main promotions
    for div1 in soup.findAll('div', class_='promo-content-wrapper'):
        deals_page_promo = format_promo_text(div1.text)
        scraped_promotion.promo_text = deals_page_promo
        print(scraped_promotion.provider, scraped_promotion.device_name,
              scraped_promotion.device_storage,
              scraped_promotion.promo_location, scraped_promotion.promo_text,
              scraped_promotion.url, scraped_promotion.date,
              scraped_promotion.time)
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)

    # get promos at the bottom of the page
    for div2 in soup.findAll('div', class_='col-xs-9'):
        deals_page_promo = format_promo_text(div2.text)
        scraped_promotion.promo_text = deals_page_promo
        print(scraped_promotion.provider, scraped_promotion.device_name,
              scraped_promotion.device_storage,
              scraped_promotion.promo_location, scraped_promotion.promo_text,
              scraped_promotion.url, scraped_promotion.date,
              scraped_promotion.time)
        add_scraped_promotions_to_database(
            scraped_promotion.provider, scraped_promotion.device_name,
            scraped_promotion.device_storage, scraped_promotion.promo_location,
            scraped_promotion.promo_text, scraped_promotion.url,
            scraped_promotion.date, scraped_promotion.time)

    driver.close()