示例#1
0
    def test_mercado_libre_conection_status(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        ml_url = Mercado_Libre.adapt_url(Mercado_Libre, user_request, country)

        ml_status = extract_soup(ml_url, 0, just_status=True)

        self.assertEqual(ml_status, 200)
示例#2
0
    def test_Ebay_conection_status(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        ebay_url = Ebay.adapt_url(Ebay, user_request, country)

        ebay_status = extract_soup(ebay_url, 0, just_status=True)

        self.assertEqual(ebay_status, 200)
示例#3
0
    def test_there_is_soup(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amz_url = Amazon.adapt_url(Amazon, country, user_request)

        amz_soup = extract_soup(amz_url, 1, just_soup=True)

        self.assertIsNotNone(amz_soup)
示例#4
0
    def test_there_is_soup(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        ml_url = Mercado_Libre.adapt_url(Mercado_Libre, country, user_request)

        ml_soup = extract_soup(ml_url, 1, just_soup=True)

        self.assertIsNotNone(ml_soup)
示例#5
0
    def test_conection_status(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, user_request, country)

        amz_status = extract_soup(amazon_url, 0, just_status=True)

        self.assertEqual(amz_status, 200)
示例#6
0
    def test_there_is_soup(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        ebay_url = Ebay.adapt_url(Ebay, user_request, country)

        ebay_soup = extract_soup(ebay_url, 1, just_soup=True)

        self.assertIsNotNone(ebay_soup)
示例#7
0
    def test_conection_status(self):
        user_request = 'audifonos inalambricos'
        amz_user_request_edited = user_request.replace(' ', Amazon.space_replacer)
        amz_url = Amazon.url.replace(Amazon.url_replacers[0], '.mx')
        amz_url = amz_url.replace(Amazon.url_replacers[1], amz_user_request_edited)

        amz_status = extract_soup(amz_url, 0, just_status=True)

        self.assertEqual(amz_status,200)
示例#8
0
    def test_mercado_libre_conection_status(self):
        user_request = 'audifonos inalambricos'
        ml_user_request_edited = user_request.replace(' ', Mercado_Libre.space_replacer)
        ml_url = Mercado_Libre.url.replace(Mercado_Libre.url_replacers[0], '.mx')
        ml_url = ml_url.replace(Mercado_Libre.url_replacers[1], ml_user_request_edited)

        ml_status = extract_soup(ml_url, 0, just_status=True)

        self.assertEqual(ml_status,200)
示例#9
0
    def test_get_brute_info_including_Nones(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amz_url = Amazon.adapt_url(Amazon, country, user_request)
        amz_soup = extract_soup(amz_url, 1, just_soup=True)

        #New test
        amz_boxes = search_boxes(amz_soup, Amazon.boxes)
        self.assertEqual(len(amz_boxes), 60)
示例#10
0
    def test_conection_status(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        for Page in self.Pages:
            page_url = Page.adapt_url(Page, user_request, country)
            #New test:
            conection_status = extract_soup(page_url, 0, just_status=True)

            self.assertEqual(conection_status,200)
示例#11
0
    def test_extract_soup(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        for Page in self.Pages:
            page_url = Page.adapt_url(Page, user_request, country)
            #New test:
            page_soup = extract_soup(page_url, 1, just_soup=True)

            self.assertIsNotNone(page_soup)
示例#12
0
    def test_get_brute_info_including_Nones(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        ebay_url = Ebay.adapt_url(Ebay, user_request, country)
        ebay_soup = extract_soup(ebay_url, 1, just_soup=True)

        #New test
        ebay_boxes = search_boxes(ebay_soup, Ebay.boxes)

        self.assertIsNotNone(len(ebay_boxes))
示例#13
0
    def test_get_brute_info_without_losses(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, country, user_request)
        amazon_soup = extract_soup(amazon_url, 1, just_soup=True)
        amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)

        #New test
        amazon_string_stars = get_brute_info(amazon_boxes, Amazon.stars)
        self.assertEqual(len(amazon_boxes), len(amazon_string_stars))
示例#14
0
 def test_get_product_boxes(self):
     user_request = 'audifonos inalambricos'
     country = 'mx'
     for Page in self.Pages:
         page_url = Page.adapt_url(Page, user_request, country)
         page_soup = extract_soup(page_url, 1, just_soup=True)
     
         #New test
         page_boxes = search_boxes(page_soup, Page.boxes)
         self.assertIsNotNone(len(page_boxes))
示例#15
0
    def test_get_brute_info_including_Nones(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        ml_url = Mercado_Libre.adapt_url(Mercado_Libre, user_request, country)
        ml_soup = extract_soup(ml_url, 1, just_soup=True)

        #New test
        ml_boxes = search_boxes(ml_soup, Mercado_Libre.boxes)

        self.assertIsNotNone(len(ml_boxes))
示例#16
0
def request_products(user_request, Page, header, home=False, country='mx'):
    page_url = Page.adapt_url(Page, user_request, country)

    # All the HTML of the page
    page_soup, status = extract_soup(page_url, header)
    # Wait until receive the info or been denied
    if status == 503:
        while status == 503:
            time.sleep(1)
            page_soup, status = extract_soup(page_url)
    elif status == 200:
        # HTML divided by products, and stored as elements of an array
        page_boxes = search_boxes(page_soup, Page.boxes)
        page_products = {}

        # Obtain the info of the product
        page_products['names'] = get_names(page_boxes, Page)
        page_products['images'] = get_images(page_boxes, Page)
        page_products['urls'] = get_products_urls(page_boxes, Page)
        page_products['prices'] = get_price(country, page_boxes, Page, home)
        page_products['status'] = status

        return page_products

    else:
        page_products = {}
        # With the empty values, not None, the script knows that this won't be
        # uploaded. In case of one 'None', it thinks that there was a product box
        # without info. Somethings that occurs in Amazon
        page_products['store'] = Page.name
        page_products['idx'] = Page.index
        page_products['product'] = user_request
        page_products['names'] = []
        page_products['images'] = []
        page_products['urls'] = []
        page_products['prices'] = []
        page_products['status'] = status
        
        return page_products
示例#17
0
    def test_products_info_getters(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        ebay_url = Ebay.adapt_url(Ebay, user_request, country)
        ebay_soup = extract_soup(ebay_url, 1, just_soup=True)
        ebay_boxes = search_boxes(ebay_soup, Ebay.boxes)

        getters = {
            'ebay_names': len(get_names(ebay_boxes, Ebay.name_and_images)),
            'ebay_images': len(get_images(ebay_boxes, Ebay)),
            'ebay_urls': len(get_products_urls(ebay_boxes, Ebay)),
            # 'ebay_price' : len(get_price(country, ebay_boxes, Ebay.price)),
        }

        for value in getters:
            self.assertEqual(len(ebay_boxes), getters[value])
示例#18
0
 def test_products_info_getters(self):
     user_request = 'audifonos inalambricos'
     country = 'mx'
     for Page in self.Pages:
         page_url = Page.adapt_url(Page, user_request, country)
         page_soup = extract_soup(page_url, 1, just_soup=True)
         page_boxes = search_boxes(page_soup, Page.boxes)
         #New test
         page_names = len(get_names(page_boxes, Page))
         page_images = len(get_images(page_boxes, Page))
         page_urls = len(get_products_urls(page_boxes, Page))
         page_price = len(get_price(country, page_boxes, Page))
         
         trials = [page_names, page_images, page_urls, page_price]
         for test in trials:
             self.assertEqual(len(page_boxes), test)
示例#19
0
    def test_cheapest_gets_info(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'

        ml_url = Mercado_Libre.adapt_url(Mercado_Libre, user_request, country)
        ml_soup = extract_soup(ml_url, 1, just_soup=True)
        ml_boxes = search_boxes(ml_soup, Mercado_Libre.boxes)
        meli_prices = get_price(country, ml_boxes, Mercado_Libre.price)

        meli_cheapest_idx, meli_cheapest_price = cheapest(
            meli_prices, position_and_price=True)
        cheapest_ml_product_1 = get_cheapest(meli_cheapest_idx, ml_boxes,
                                             meli_cheapest_price, country,
                                             Mercado_Libre)

        for value in cheapest_ml_product_1:
            self.assertIsNotNone(cheapest_ml_product_1[value])
示例#20
0
    def test_products_info_getters(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        ml_url = Mercado_Libre.adapt_url(Mercado_Libre, user_request, country)
        ml_soup = extract_soup(ml_url, 1, just_soup=True)
        ml_boxes = search_boxes(ml_soup, Mercado_Libre.boxes)

        getters = {
            'ml_names': len(get_names(ml_boxes,
                                      Mercado_Libre.name_and_images)),
            'ml_images': len(get_images(ml_boxes, Mercado_Libre)),
            'ml_urls': len(get_products_urls(ml_boxes, Mercado_Libre)),
            'ml_price': len(get_price(country, ml_boxes, Mercado_Libre.price)),
        }

        for value in getters:
            self.assertEqual(len(ml_boxes), getters[value])
示例#21
0
    def test_cheapest_gets_info(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'

        ebay_url = Ebay.adapt_url(Ebay, user_request, country)
        ebay_soup = extract_soup(ebay_url, 1, just_soup=True)
        ebay_boxes = search_boxes(ebay_soup, Ebay.boxes)
        ebay_prices = get_price(country, ebay_boxes, Ebay.price)

        ebay_cheapest_idx, ebay_cheapest_price = cheapest(
            ebay_prices, position_and_price=True)
        cheapest_ebay_product_1 = get_cheapest(ebay_cheapest_idx, ebay_boxes,
                                               ebay_cheapest_price, country,
                                               Ebay)

        for value in cheapest_ebay_product_1:
            self.assertIsNotNone(cheapest_ebay_product_1[value])
示例#22
0
    def test_products_info_getters(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, country, user_request)
        amazon_soup = extract_soup(amazon_url, 1, just_soup=True)
        amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)

        amazon_names = len(get_names(amazon_boxes, Amazon.name_and_images))
        amazon_images = len(get_images(amazon_boxes, Amazon.name_and_images))
        amazon_urls = len(get_products_urls(amazon_boxes, Amazon.product_urls))
        amazon_price = len(get_price(country, amazon_boxes, Amazon.price))
        amazon_ids = len(amazon_products_id(amazon_boxes))
        amazon_reviews = len(get_reviews(country, amazon_boxes, Amazon.reviews))
        amazon_stars = len(get_stars(country, amazon_boxes, Amazon.stars))

        trials = [amazon_names, amazon_images, amazon_urls, amazon_price, amazon_ids, amazon_reviews, amazon_stars]
        for test in trials:
            self.assertEquals(len(amazon_boxes), test)
示例#23
0
def scraper(Page, user_request, country):
    #Adapt the url
    url = Page.adapt_url(Page, country, user_request)

    #All the HTML of the page
    soup = extract_soup(url, 1, just_soup=True)

    # #HTML divided by products, and stored as elements of an array
    boxes = search_boxes(soup, Page.boxes)

    # From this part, could get better AFTER the 4 scrapers are made
    #From the Boxes, obtain the prices
    prices = get_price(country, boxes, Page.price)

    #Obtain the cheapest from prices and then, you obtain the cheapest product as a dictionary
    cheapest_idx, cheapest_price = cheapest(prices, position_and_price=True)
    cheapest_product_dictionary = get_cheapest(cheapest_idx, boxes,
                                               cheapest_price, country, Page)

    return cheapest_product_dictionary
示例#24
0
def scraper(user_request, country):
    #Adapt the url
    ml_url = Mercado_Libre.adapt_url(Mercado_Libre, country, user_request)

    #All the HTML of the page
    ml_soup = extract_soup(ml_url, 1, just_soup=True)

    # #HTML divided by products, and stored as elements of an array
    ml_boxes = search_boxes(ml_soup, Mercado_Libre.boxes)

    # From this part, could get better AFTER the 4 scrapers are made
    #From the Boxes, obtain the prices
    meli_prices = get_price(country, ml_boxes, Mercado_Libre.price)

    #Obtain the cheapest from prices and then, you obtain the cheapest product as a dictionary
    meli_cheapest_idx, meli_cheapest_price = cheapest(meli_prices,
                                                      position_and_price=True)
    cheapest_ml_product_dictionary = get_cheapest(meli_cheapest_idx, ml_boxes,
                                                  meli_cheapest_price, country,
                                                  Mercado_Libre)

    return cheapest_ml_product_dictionary
示例#25
0
def scraper(user_request, country):
    #Adapt the url
    amazon_url = Amazon.adapt_url(Amazon, user_request, country)

    #All the HTML of the page
    amazon_soup = extract_soup(amazon_url, 1, just_soup=True)

    # #HTML divided by products, and stored as elements of an array
    amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)

    # From this part, could get better AFTER the 4 scrapers are made
    #From the Boxes, obtain the prices
    amazon_prices = get_price(country, amazon_boxes, Amazon.price)

    #Obtain the cheapest from prices and then, you obtain the cheapest product as a dictionary
    amazon_cheapest_idx, amazon_cheapest_price = cheapest(
        amazon_prices, position_and_price=True)
    cheapest_amazon_product_dictionary = get_cheapest(amazon_cheapest_idx,
                                                      amazon_boxes,
                                                      amazon_cheapest_price,
                                                      country, Amazon)

    return cheapest_amazon_product_dictionary
示例#26
0
    #Obtain the cheapest from prices and then, you obtain the cheapest product as a dictionary
    cheapest_idx, cheapest_price = cheapest(prices, position_and_price=True)
    cheapest_product_dictionary = get_cheapest(cheapest_idx, boxes,
                                               cheapest_price, country, Page)

    return cheapest_product_dictionary


if __name__ == "__main__":
    user_request = 'audifonos inalambricos'
    country = 'mx'
    ebay_url = Ebay.adapt_url(Ebay, user_request, country)

    #All the HTML of the page
    ebay_soup = extract_soup(ebay_url, 1, just_soup=True)

    # #HTML divided by products, and stored as elements of an array
    ebay_boxes = search_boxes(ebay_soup, Ebay.boxes)
    # print(ebay_boxes)

    ebay_products = {}

    ebay_products['names'] = get_names(ebay_boxes, Ebay.name_and_images)
    # #Ebay's images source (link)
    ebay_products['images'] = get_images(ebay_boxes, Ebay)

    ebay_products['urls'] = get_products_urls(ebay_boxes, Ebay)
    ebay_products['prices'] = get_price(country, ebay_boxes, Ebay.price)

    cheapest_idx = cheapest(ebay_products['prices'])
示例#27
0
    cheapest_amazon_product_dictionary = get_cheapest(amazon_cheapest_idx,
                                                      amazon_boxes,
                                                      amazon_cheapest_price,
                                                      country, Amazon)

    return cheapest_amazon_product_dictionary


if __name__ == "__main__":

    user_request = 'audifonos inalambricos'
    country = 'mx'
    amazon_url = Amazon.adapt_url(Amazon, user_request, country)

    #All the HTML of the page
    amazon_soup = extract_soup(amazon_url, 1, just_soup=True)

    #HTML divided by products, and stored as elements of an array
    amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)
    amazon_products = {}

    amazon_products['name'] = get_names(amazon_boxes, Amazon.name_and_images)
    '''Amazon's images source (link)'''
    amazon_products['image'] = get_images(amazon_boxes, Amazon)

    amazon_products['url'] = get_products_urls(amazon_boxes, Amazon)
    '''Just Amazon's products id. Is used as a url generator:
    amazon's url + domain + "/dp/" + product_id'''
    # amazon_products['id']= amazon_products_id(amazon_boxes)
    '''Just stars as float'''
    amazon_products['star'] = get_stars(country, amazon_boxes, Amazon.stars)
示例#28
0
from scrape_data import Mercado_Libre, Products
from scrape_funcs import extract_soup, search_boxes, get_brute_info
from data_filters import get_names, get_images, get_products_urls, get_price

from general_funcs import cheapest, get_cheapest

user_request = 'audifonos inalambricos'
country = 'mx'
ml_url = Mercado_Libre.adapt_url(Mercado_Libre, country, user_request)

#All the HTML of the page
ml_soup = extract_soup(ml_url, 1, just_soup=True)

#HTML divided by products, and stored as elements of an array
ml_boxes = search_boxes(ml_soup, Mercado_Libre.boxes)
ml_products = {}

ml_products['names'] = get_names(ml_boxes, Mercado_Libre.name_and_images)

#Mercado_Libre's images source (link)
ml_products['images'] = get_images(ml_boxes, Mercado_Libre.name_and_images)

ml_products['urls'] = get_products_urls(ml_boxes, Mercado_Libre.product_urls)

ml_products['prices'] = get_price(country, ml_boxes, Mercado_Libre.price)

cheapest = cheapest(ml_products['prices'])
cheapest_ml_product = get_cheapest(cheapest, ml_products)
for key in cheapest_ml_product:
    print(key, ':', cheapest_ml_product[key])