Exemplo n.º 1
0
headers = {'User-Agent': GET_UA()}
webpage_to_extract = "https://thebox.com.pe"
categories_name = "collections"
subdirectory = "/collections"
#Can be "http" or "/"
starts_with = '/'
#if it is more than one, it has to be a list
products_tags = [
    "grid-view-item grid-view-item--sold-out product-card",
    "grid-view-item product-card"
]
#--------------------------------------------#
name_tag = "h1"
name_class = "product-single__title"
price_tag = 'span'
price_class = "price-item price-item--sale"
description_tag = 'div'
description_class = "product-single__description rte"

web_scrapping = Scrapper(baseurl, headers)
categories = web_scrapping.get_links(webpage_to_extract,
                                     categories_name,
                                     comp=subdirectory,
                                     sw=starts_with)
pages_per_cat = web_scrapping.link_pages(categories, products_tags)

products_list = web_scrapping.get_products(pages_per_cat, 'products')

data = web_scrapping.product_data(products_list, name_tag, name_class,
                                  price_tag, price_class, description_tag,
                                  description_class)