Exemplo n.º 1
0
    def test_adapt_url(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, user_request, country)

        self.assertEqual(
            amazon_url, 'https://www.amazon.com.mx/s?k=audifonos+inalambricos')
Exemplo n.º 2
0
    def test_there_is_soup(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, user_request, country)

        amz_soup = extract_soup(amazon_url, 1, just_soup=True)

        self.assertIsNotNone(amz_soup)
Exemplo n.º 3
0
    def test_conection_status(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, user_request, country)

        amz_status = extract_soup(amazon_url, 0, just_status=True)

        self.assertEqual(amz_status, 200)
Exemplo n.º 4
0
    def test_get_brute_info_including_Nones(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, user_request, country)
        amz_soup = extract_soup(amazon_url, 1, just_soup=True)

        #New test
        amz_boxes = search_boxes(amz_soup, Amazon.boxes)
        self.assertEqual(len(amz_boxes), 60)
Exemplo n.º 5
0
    def test_get_brute_info_without_losses(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, user_request, country)
        amazon_soup = extract_soup(amazon_url, 1, just_soup=True)
        amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)

        #New test
        amazon_string_stars = get_brute_info(amazon_boxes, Amazon.stars)
        self.assertEqual(len(amazon_boxes), len(amazon_string_stars))
Exemplo n.º 6
0
    def test_products_info_getters(self):
        user_request = 'audifonos inalambricos'
        country = 'mx'
        amazon_url = Amazon.adapt_url(Amazon, user_request, country)
        amazon_soup = extract_soup(amazon_url, 1, just_soup=True)
        amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)

        amazon_names = len(get_names(amazon_boxes, Amazon.name_and_images))
        amazon_images = len(get_images(amazon_boxes, Amazon))
        amazon_urls = len(get_products_urls(amazon_boxes, Amazon))
        amazon_price = len(get_price(country, amazon_boxes, Amazon.price))
        amazon_reviews = len(get_reviews(country, amazon_boxes,
                                         Amazon.reviews))
        amazon_stars = len(get_stars(country, amazon_boxes, Amazon.stars))

        trials = [
            amazon_names, amazon_images, amazon_urls, amazon_price,
            amazon_reviews, amazon_stars
        ]
        for test in trials:
            self.assertEqual(len(amazon_boxes), test)
Exemplo n.º 7
0
def scraper(user_request, country):
    #Adapt the url
    amazon_url = Amazon.adapt_url(Amazon, user_request, country)

    #All the HTML of the page
    amazon_soup = extract_soup(amazon_url, 1, just_soup=True)

    # #HTML divided by products, and stored as elements of an array
    amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)

    # From this part, could get better AFTER the 4 scrapers are made
    #From the Boxes, obtain the prices
    amazon_prices = get_price(country, amazon_boxes, Amazon.price)

    #Obtain the cheapest from prices and then, you obtain the cheapest product as a dictionary
    amazon_cheapest_idx, amazon_cheapest_price = cheapest(
        amazon_prices, position_and_price=True)
    cheapest_amazon_product_dictionary = get_cheapest(amazon_cheapest_idx,
                                                      amazon_boxes,
                                                      amazon_cheapest_price,
                                                      country, Amazon)

    return cheapest_amazon_product_dictionary
Exemplo n.º 8
0
    #Obtain the cheapest from prices and then, you obtain the cheapest product as a dictionary
    amazon_cheapest_idx, amazon_cheapest_price = cheapest(
        amazon_prices, position_and_price=True)
    cheapest_amazon_product_dictionary = get_cheapest(amazon_cheapest_idx,
                                                      amazon_boxes,
                                                      amazon_cheapest_price,
                                                      country, Amazon)

    return cheapest_amazon_product_dictionary


if __name__ == "__main__":

    user_request = 'audifonos inalambricos'
    country = 'mx'
    amazon_url = Amazon.adapt_url(Amazon, user_request, country)

    #All the HTML of the page
    amazon_soup = extract_soup(amazon_url, 1, just_soup=True)

    #HTML divided by products, and stored as elements of an array
    amazon_boxes = search_boxes(amazon_soup, Amazon.boxes)
    amazon_products = {}

    amazon_products['name'] = get_names(amazon_boxes, Amazon.name_and_images)
    '''Amazon's images source (link)'''
    amazon_products['image'] = get_images(amazon_boxes, Amazon)

    amazon_products['url'] = get_products_urls(amazon_boxes, Amazon)
    '''Just Amazon's products id. Is used as a url generator:
    amazon's url + domain + "/dp/" + product_id'''