def parse_adidas_page(driver: WebDriver, url): driver.get(get_url_largest(url)) time.sleep(3) logging.info("Get Down") test_adidas_down(driver, 20) logging.info("Get Down") url = driver.current_url logging.info("Get Url") num_pages = get_num_pages(driver) logging.info("Get Num Pages") sources = [driver.page_source] logging.info("Get Source") for i in range(2, num_pages+1): link = get_link_to_next_page(url, i) logging.info("Get Link To Next Page") driver.get(link) logging.info("Move to Link to Next Page") time.sleep(3) test_adidas_down(driver, 20) logging.info("Get Down") sources.append(driver.page_source) logging.info("Append source") driver.close() driver.quit() logging.info("Close Driver") logging.info("Start make_soup") product_list = [make_soup(source).find_all("div", class_="innercard") for source in sources] logging.info("End make_soup") product_list = chain(*product_list) logging.info("Start parse all source") return map(parse_product, product_list)
def parse_nike_page(driver, url): driver.get(url) driver.implicitly_wait(15) time.sleep(5) get_down_topman(driver, 50) source = driver.page_source product_list = make_soup(source).find_all("div", class_="grid-item") return map(parse_product, product_list)
def parse_guess_page(driver, url): driver.get(url) driver.implicitly_wait(15) time.sleep(15) driver.execute_script("window.scrollTo(0, 40000)") time.sleep(5) source = driver.page_source product_list = make_soup(source).find_all("li", class_="product-bucket") return map(parse_product, product_list)
def parse_asos_page(driver, url): driver.get(url) driver.implicitly_wait(10) time.sleep(7) load_more(driver) driver.execute_script("window.scrollTo(0, 40000)") time.sleep(10) source = driver.page_source product_list = make_soup(source).find_all(class_="_2oHs74P") return map(parse_product, product_list)
def parse_all_saints(driver: WebDriver, url): driver.get(get_url_view(url)) driver.implicitly_wait(30) time.sleep(10) try: driver.execute_script("window.scrollTo(0, 40000)") time.sleep(10) except NoSuchElementException: pass product_list = make_soup(driver.page_source).find_all("div", class_="product") return map(parse_product, product_list)
def parse_topman_page(driver: WebDriver, url): driver.get(url) driver.implicitly_wait(15) time.sleep(5) set_grid_to_4(driver) sources = [driver.page_source] position = 700 while not is_bottom(driver): driver.execute_script("window.scrollTo(0, {})".format(position)) sources.append(driver.page_source) time.sleep(0.3) position += 700 product_list = (make_soup(source).find_all("div", class_="product") for source in sources) items = [] for l in product_list: items.extend(filter(bool, map(parse_product, l))) return items
def parse_tommy_page(driver, url): driver.get(url) driver.implicitly_wait(15) time.sleep(5) close_modal(driver) get_down_tommy(driver) url = driver.current_url num_pages = get_num_pages(driver) sources = [driver.page_source] for i in range(2, num_pages + 1): driver.get(get_link_to_next_page(url, i)) get_down_tommy(driver) sources.append(driver.page_source) product_list = [ make_soup(source).find_all("div", class_="productCell") for source in sources ] product_list = chain(*product_list) return map(parse_product, product_list)
def parse_zara_page(driver, url): driver.get(url) driver.implicitly_wait(15) time.sleep(5) close_modal(driver) set_grid_to_4(driver) sources = [driver.page_source] position = 700 for _ in range(40): driver.execute_script("window.scrollTo(0, {})".format(position)) sources.append(driver.page_source) time.sleep(0.3) position += 700 product_list = [ make_soup(source).find_all("li", class_="product") for source in sources ] product_list = list(chain(*product_list)) return filter(bool, map(parse_product, product_list))
def parse_reebok_page(driver, url): driver.get(url) driver.implicitly_wait(15) time.sleep(5) set_grid_to_largest(driver) url = driver.current_url num_pages = get_num_pages(driver) get_down_adidas(driver, 25) sources = [driver.page_source] for i in range(2, num_pages + 1): driver.get(get_link_to_next_page(url, i)) time.sleep(2) get_down_adidas(driver, 25) sources.append(driver.page_source) product_list = [ make_soup(source).find_all("div", class_="innercard") for source in sources ] product_list = chain(*product_list) return map(parse_product, product_list)
def parse_all_ck(driver, url): driver.get(url) driver.implicitly_wait(15) time.sleep(10) close_modal(driver) driver.execute_script("window.scrollTo(0, 60000)") num_pages = get_num_pages(driver) logging.info(f"Get {num_pages} pages") time.sleep(10) sources = [driver.page_source] for i in range(2, num_pages + 1): to_next_page(driver) driver.implicitly_wait(15) time.sleep(10) driver.execute_script("window.scrollTo(0, 60000)") time.sleep(5) sources.append(driver.page_source) product_list = [make_soup(source).find_all("div", class_="productCell") for source in sources] product_list = chain(*product_list) return filter(bool, map(parse_product, product_list))
def parse_source(source): return make_soup(source).find_all("div", class_="each-product")
def parse_ck_page(source): product_list = make_soup(source).find_all("div", class_="productCell") return filter(bool, map(parse_product, product_list))