Пример #1
0
    def test_should_retrieve_products_from_query(self, store, query):
        ProductDatabase.clear_database()

        extractor = PageExtractor(store)
        products_list = extractor.retrieve_products_from_query(query)

        assert type(products_list) == list
        # Cannot check for equal because there might be items repited which were not inserted in the database
        assert len(products_list) >= ProductDatabase.get_products_total()
Пример #2
0
    def test_should_get_info_list_about_products(self, store, query):
        # previous_product_total = ProductDatabase.get_products_total()

        extractor = PageExtractor(store)
        parsed_html = extractor.retrieve_html_parsed_from_query(query)
        items_list = extractor.get_info_list_about_products(parsed_html)

        assert type(items_list) == list
        assert type(items_list[0]) == dict
Пример #3
0
 def test_should_get_info_dict_for_product(self, store, query):
     extractor = PageExtractor(store)
     parsed_html = extractor.retrieve_html_parsed_from_query(query)
     parsed_list = extractor.get_items_list_from_parsed_html(parsed_html)
     item = extractor.get_info_dict_for_product(parsed_list[0])
     # import pdb; pdb.set_trace()
     assert type(parsed_list) == element.ResultSet
     assert type(item) == dict
     assert item['name'] is not None
     assert item['link'] is not None
Пример #4
0
    def test_should_store_products_on_json(self, store, query):
        import os
        ProductDatabase.clear_database()
        filename = 'test.json'

        extractor = PageExtractor(store)
        products_list = extractor.retrieve_products_from_query(query)
        extractor.store_products_on_json(products_list, filename)

        # import pdb; pdb.set_trace()
        assert os.path.exists(filename) == True
        # Cannot check for equal because there might be items repited which were not inserted in the database
        assert len(products_list) >= ProductDatabase.get_products_total()
Пример #5
0
    def test_should_query_webdriver(self):
        import os
        # import pandas as pd
        from products import Product
        ProductDatabase.clear_database()
        filename = 'test.json'

        extractor = PageExtractor('submarino')
        products_list = extractor.query_webdriver("goblet of fire")
        products = [Product(item_attrs) for item_attrs in products_list]
        extractor.store_products_on_json(products, filename)
        # import pdb; pdb.set_trace()
        filtered_products = ProductDatabase.filter(price__gte=700,
                                                   price__lt=1000)
        extractor.store_products_on_json(filtered_products,
                                         'test_filtered.json')

        # df = pd.DataFrame({
        #     'name': [prod.name for prod in filtered_products],
        #     'price': [prod.price for prod in filtered_products]
        # })

        assert os.path.exists(filename) == True
        # We cant check it like this since the add_product on ProductDatabase checks whether the product was already inserted
        # assert len(products_list) == ProductDatabase.get_products_total()
        assert ProductDatabase.get_products_total() != 0
Пример #6
0
 def test_should_get_items_list_from_parsed_html(self, store, query):
     extractor = PageExtractor(store)
     parsed_html = extractor.retrieve_html_parsed_from_query(query)
     parsed_list = extractor.get_items_list_from_parsed_html(parsed_html)
     assert type(parsed_list) == element.ResultSet
     assert type(parsed_list[0]) == element.Tag
Пример #7
0
 def test_should_retrieve_html_parsed_from_query(self, store, query):
     extractor = PageExtractor(store)
     parsed_html = extractor.retrieve_html_parsed_from_query(query)
     assert type(parsed_html) == BeautifulSoup
     assert extractor.parsed_html == parsed_html
Пример #8
0
 def test_should_get_search_url(self, store, query, expected_url):
     extractor = PageExtractor(store)
     assert extractor.get_search_url(query) == expected_url
Пример #9
0
 def test_should_get_tag_and_class_for_info(self, store, item,
                                            expected_tuple):
     extractor = PageExtractor(store)
     tag_and_class = extractor.get_tag_and_class_for_info(item)
     assert tag_and_class == expected_tuple
Пример #10
0
 def test_should_init(self):
     extractor = PageExtractor('magazineluiza')
     assert extractor.store_id == 'magazineluiza'
Пример #11
0
 def test_should_convert_BRL_currency_to_float(self, currency_str,
                                               expected_float):
     value = PageExtractor.convert_BRL_currency_to_float(currency_str)
     assert value == expected_float