def parse_entry(self, driver): more_btn = of_utils.find_element_by_css_selector(driver, 'div.pagination > div.pager > a') if more_btn: driver.execute_script('arguments[0].click();', more_btn) of_utils.sleep(2) elements = of_utils.find_elements_by_css_selector(driver, 'article.js-ProductItem > a') return [element.get_attribute('href').strip() for element in elements]
def parse_entry(self, driver): driver.implicitly_wait(15) product_count = 0 while True: elements = of_utils.find_elements_by_css_selector( driver, 'div.productItemContainer > a') if not elements: elements = of_utils.find_elements_by_css_selector( driver, 'li.productItemContainer > a') if not elements: elements = of_utils.find_elements_by_css_selector( driver, 'li.productItem > a') if not elements: elements = of_utils.find_elements_by_css_selector( driver, 'li.lookItem > a') if not elements: elements = of_utils.find_elements_by_css_selector( driver, 'ul.lv-list-container a') if len(elements) > product_count: product_count = len(elements) driver.execute_script( 'window.scrollBy(0, document.body.scrollHeight);') of_utils.sleep(4) else: break return [element.get_attribute('href').strip() for element in elements]
def parse_product(self, driver): of_utils.sleep(5) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector(driver, 'div.breadcrumbLeaf > p.attributesUpdater.Title > span.value') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny N/A # images elements = of_utils.find_elements_by_css_selector(driver, 'div.item-alternativeImages-shots > ul > li > img') print(elements) images = [] for element in elements: image = element.get_attribute('data-origin').strip().replace('_10_', '_20_') images.append(image) product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_css_selector(driver, 'span.modelName') product['code'] = element.get_attribute('innerHTML').strip() # Code here... btn = of_utils.find_element_by_css_selector(driver, 'ul.itemDetails-info-accordion > li > h2 > div.plusIcon') driver.execute_script('arguments[0].click();', btn) elements = of_utils.find_elements_by_css_selector(driver, 'div.itemdescription > ul > li') texts = [element.get_attribute('innerHTML').strip() for element in elements] product['detail'] = '\n'.join(texts) return product
def parse_product(self, driver): of_utils.sleep(2) # driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, ".product-name") if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny element = of_utils.find_element_by_css_selector( driver, 'label.regular-price>span') if element: product['price_euro_de'] = int( float(element.text.strip().replace('€', '').replace(',', ''))) # images elements = of_utils.find_elements_by_css_selector( driver, '.product-image-gallery > a > img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail N/A return product
def parse_product(self, driver): of_utils.sleep(5) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector(driver, 'h1#curr_skuName') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_css_selector(driver, 'p.pronumber') if element: product['code'] = element.text.split(':')[-1].strip() # price_cny element = of_utils.find_element_by_css_selector(driver, 'span.skuPrice') if not element: element = of_utils.find_element_by_css_selector(driver, 'span.price#skuPrice') if element: price_text = element.text.strip()[3:].strip().replace(',', '') # 去掉开头的RMB product['price_cny'] = int(float(price_text)) # images images = [] elements = of_utils.find_elements_by_css_selector(driver, 'ul#fullscreen_swatchpro_small > li > img') for element in elements: txt = element.get_attribute('src').split('?')[0].strip() images.append(txt) product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_css_selector(driver, 'div.description') product['detail'] = element.text.strip() return product
def parse_product(self, driver): product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector(driver, 'span.product.attribute.name') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_css_selector(driver, 'span.value[itemprop=sku]') if element: product['code'] = element.text.strip() # price_cny element = of_utils.find_element_by_css_selector(driver, 'span.product-price-reveal__action__show') if element: driver.execute_script('arguments[0].click();', element) of_utils.sleep(2) element = of_utils.find_element_by_css_selector(driver, 'span.price') if element: price_text = element.text.strip()[1:].strip().replace(',', '') # 去掉开头的¥ product['price_cny'] = int(float(price_text)) # images elements = of_utils.find_elements_by_css_selector(driver, 'div.fotorama__stage__shaft > div > img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail elements = of_utils.find_elements_by_css_selector(driver, 'div.product-info-details-content > div.value > p') texts = [element.get_attribute('innerHTML').strip() for element in elements] product['detail'] = '\n'.join(texts) return product
def parse_product(self, driver): of_utils.sleep(8) # 等待图片刷出来 product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'h1.page-title') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_css_selector( driver, 'div.value[itemprop=sku]') if element: product['code'] = element.text.strip() # price_cny N/A # images elements = of_utils.find_elements_by_css_selector( driver, 'div[data-gallery-role=stage-shaft] > div') images = [ element.get_attribute('href').strip() for element in elements ] product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_css_selector( driver, 'div.description > div.value') product['detail'] = element.text.strip() return product
def parse_product(self, driver): btn = of_utils.find_element_by_css_selector(driver, '.glCancelBtn') if btn: driver.execute_script('arguments[0].click();', btn) of_utils.sleep(2) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, '.product-title') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_gbp element = of_utils.find_element_by_css_selector( driver, '.dark-happy-place--grey-med>span') if element: product['price_gbp'] = element.text.replace('£', '').replace( ',', '').replace('.00', '').strip() # # images elements = of_utils.find_elements_by_css_selector( driver, '.product__image-wrapper ul li img') images = [ element.get_attribute('data-src').strip() for element in elements ] product['images'] = ';'.join(images) # # detail N/A return product
def parse_entry(self, driver): urls = [] while True: elements = of_utils.find_elements_by_css_selector( driver, '.list-right-content .list-item .img-box a') if elements: for ele in elements: if ele.get_attribute('href') != None: urls.append(ele.get_attribute('href').strip()) total_page = of_utils.find_element_by_css_selector( driver, '#totalPages').get_attribute('value') cur_page = of_utils.find_element_by_css_selector( driver, '#currentPage').get_attribute('value') # print('cur:%s,total:%s' % (cur_page,total_page)) if cur_page != total_page: btn = of_utils.find_element_by_css_selector( driver, '.next-page') if btn: driver.execute_script('arguments[0].click();', btn) of_utils.sleep(4) else: break else: break return urls
def parse_product(self, driver): of_utils.sleep(12) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector(driver, 'div.product-item__detail-name > div.product-name') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_css_selector(driver, 'p.productreference > span.productreference-value') if element: product['code'] = element.text.strip() # price_cny element = of_utils.find_element_by_css_selector(driver, 'div.product-item__detail-name > div.product-price > span.price-sales') if not element: element = of_utils.find_element_by_css_selector(driver, 'div.product-item__detail-price > div.product-price > span.price-sales') if element: price_text = element.text.strip()[1:].strip().replace(',', '') # 去掉开头的¥ product['price_cny'] = int(float(price_text)) # images elements = of_utils.find_elements_by_css_selector(driver, 'div.product-item__gallery-item-image > a > img') images = ['https://www.acnestudios.com' + element.get_attribute('data-zoom-src').strip() for element in elements] product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_css_selector(driver, 'div.product-item__core-information > div > div.product-item__detail-info-description') product['detail'] = element.text.strip() return product
def parse_product(self, driver): of_utils.sleep(2) product = of_spider.empty_product.copy() ele = of_utils.find_element_by_css_selector(driver, 'div.product-name>h1') product['title'] = ele.text.strip() if ele else '' ele = of_utils.find_element_by_css_selector(driver, 'div[itemprop=sku]') product['code'] = ele.text.strip() if ele else '' ele = of_utils.find_element_by_css_selector(driver, 'meta[itemprop=price]') product['price_cny'] = of_utils.convert_price(ele.get_attribute('content').strip()) if ele else 0 # images imgs=[] eles = of_utils.find_elements_by_css_selector(driver, '.fotorama__thumb>img') if not eles: eles = of_utils.find_elements_by_css_selector(driver, '.fotorama__stage__frame>img') for ele in eles: img = ele.get_attribute('src').strip().replace('cache', '') for a in img.split('/'): if len(a) == 32: img = img.replace(a,'') imgs.append(img) product['images'] = ';'.join(imgs) return product
def parse_entry(self, driver): driver.implicitly_wait(15) loadMore = of_utils.find_element_by_xpath( driver, '//div[@class="loadMore isVisible"]/button') if loadMore: driver.execute_script('arguments[0].click();', loadMore) of_utils.sleep(4) product_count = 0 while True: elements = of_utils.find_elements_by_xpath( driver, '//a[@class="productQB__wrapperImage js-product-qb-link"]') if len(elements) > product_count: product_count = len(elements) action = ActionChains(driver).move_to_element(elements[-1]) action.send_keys(Keys.PAGE_DOWN) action.send_keys(Keys.PAGE_DOWN) action.send_keys(Keys.PAGE_DOWN) action.send_keys(Keys.PAGE_DOWN) action.send_keys(Keys.PAGE_DOWN) action.perform() of_utils.sleep(4) else: break return [element.get_attribute('href').strip() for element in elements]
def parse_product(self, driver): elements = of_utils.find_elements_by_css_selector( driver, '.product_list .product_1 a') flag = int(driver.current_url.split('?')[-1]) element = elements[flag] driver.execute_script('arguments[0].click();', element) of_utils.sleep(2) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, '.popup_product_%s .product_detail_content .jspPane h2' % flag) if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny element = of_utils.find_element_by_css_selector( driver, '.popup_product_%s .product_photo h3' % flag) if element: product['price_hkd'] = element.text.strip().split( '/')[0].strip().replace('$', '').replace(',', '') # images elements = of_utils.find_elements_by_css_selector( driver, '.popup_product_%s .product_photo img' % flag) images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail N/A return product
def getList(q): url = 'https://www.ncbi.nlm.nih.gov/pubmed/?term=%s' % q options = webdriver.ChromeOptions() options.add_argument('--ignore-certificate-errors') driver = webdriver.Chrome(chrome_options=options) driver.maximize_window() driver.get(url) driver.implicitly_wait(15) driver.find_element_by_xpath('//div[@id="result_action_bar"]/ul/li[3]/a').click() driver.find_element_by_xpath('//div[@id="display_settings_menu_ps"]/fieldset/ul/li[last()]/input').click() of_utils.sleep(5) lst = [] while True: eles = driver.find_elements_by_xpath('//div[@class="rslt"]/p[@class="title"]/a') for e in eles: d = {} d['title'] = e.text.strip() d['url'] = e.get_attribute('href') lst.append(d) # btnNext = driver.find_element_by_xpath('//a[@class="active page_link next"]') # if btnNext: # btnNext.click() # of_utils.sleep(5) # else: # break break return lst
def parse_product(self, driver): of_utils.sleep(2) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'main .product-core-information .product-title') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny element = of_utils.find_element_by_css_selector( driver, 'main .product-core-information .product-price') if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # # images elements = of_utils.find_elements_by_css_selector( driver, 'main .product-core-images .swiper-slide-duplicate .image-zoom-inner>img' ) images = [element.get_attribute('src').strip() for element in elements] if images: images = {}.fromkeys(images).keys() product['images'] = ';'.join(images) # # detail element = of_utils.find_element_by_css_selector( driver, 'main .product-description-content') product['detail'] = element.text.strip() return product
def parse_entry(self, driver): # 手袋 elements = of_utils.find_elements_by_css_selector(driver, 'div.fs-products-grid__product.fs-gridelement > div.fs-products-grid__product__illu > a') if not elements: # 手袋2 elements = of_utils.find_elements_by_css_selector(driver, 'div.fs-products-grid > div.fs-products-grid__product.fs-gridelement > div.fs-products-grid__product__wrapper > a') if not elements: # 彩妆 elements = of_utils.find_elements_by_css_selector(driver, 'div.fnb_col-wd6.fnb_product-img > a') # 手表 if not elements: load_more = of_utils.find_element_by_css_selector(driver, 'div.pd-action-btns > button[role=button]') if not load_more: # 戒指 load_more = of_utils.find_element_by_css_selector(driver, 'div.display-all > a') if load_more: driver.execute_script('arguments[0].click();', load_more) of_utils.sleep(5) product_count = 0 while True: elements = of_utils.find_elements_by_css_selector(driver, 'div.products > div.row > div > ul > li > div.product-item-wrapper > a') if len(elements) > product_count: product_count = len(elements) driver.execute_script('window.scrollBy(0, document.body.scrollHeight);') of_utils.sleep(4) else: break return [element.get_attribute('href').strip() for element in elements]
def parse_product(self, driver): of_utils.sleep(5) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//i[@class="iconfont icon-ICON_share"]/..') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_xpath(driver, '//div[@class="code"]') if element: product['code'] = element.text.strip() # price_cny element = of_utils.find_element_by_xpath( driver, '//div[@class="product-price"]/i') if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_xpath( driver, '//ul[@class="small-img-list"]/li/img') images = [ element.get_attribute('data-src').strip().split('?x-oss-process') [0] for element in elements ] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail N/A element = of_utils.find_element_by_xpath( driver, '//div[@class="desc"]//div[@class="content"]') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): driver.implicitly_wait(15) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector(driver, 'div.product-main-bloc > div > div > h1') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A element = of_utils.find_element_by_css_selector(driver, 'p.reference > span') if element: product['code'] = element.text.strip() # price_cny N/A # images while True: elements = of_utils.find_elements_by_css_selector(driver, 'div.product-media > img') images = [element.get_attribute('data-original').strip() for element in elements] print(images[0]) if not images[0].endswith('.gif'): break else: of_utils.sleep(5) # detail texts = [] element = of_utils.find_element_by_css_selector(driver, 'p.shortDescription') texts.append(element.get_attribute('innerHTML').strip()) element = of_utils.find_element_by_css_selector(driver, 'div.box-collateral > p') texts.append(element.get_attribute('innerHTML').strip()) product['detail'] = '\n'.join(texts) return product
def parse_product(self, driver): of_utils.sleep(2) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_xpath( driver, '//h1[contains(@class,"c-product-name-pdp")]') if element: product['title'] = element.text.strip().replace('\n', ' ') else: raise Exception('Title not found') # code N/A element = of_utils.find_element_by_xpath( driver, '//span[contains(@class,"o-utility")]') if element: product['code'] = element.text.strip().replace('商品编号 :', '').strip() # price_cny N/A # images elements = of_utils.find_elements_by_xpath( driver, '//div[contains(@class,"product-image-first")]/img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join({}.fromkeys(images).keys()) # detail element = of_utils.find_element_by_xpath( driver, '//p[@class="text--center c-collection-desc"]') if element: product['detail'] = element.text.strip() return product
def parse_product(self, driver): of_utils.sleep(4) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, '.product-information .product-name') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code N/A # price_cny element = of_utils.find_element_by_css_selector( driver, '.product-information .product-price .p-price') if element: product['price_cny'] = of_utils.convert_price(element.text.strip()) # images elements = of_utils.find_elements_by_css_selector( driver, '.product-information .product-details-image-gallery img') images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail N/A element = of_utils.find_element_by_css_selector( driver, '.product-information .product-short-description') if element: product['detail'] = element.text.strip() return product
def parse_entry(self, driver): of_utils.sleep(5) urls = [] while True: btn = of_utils.find_element_by_xpath( driver, '//div[contains(@class,"etude-btn-listMore")]') goods_page_all = of_utils.find_element_by_xpath( driver, '//span[contains(@class,"goods_page_all")]').text.strip() goods_page_now = of_utils.find_element_by_xpath( driver, '//span[contains(@class,"goods_page_now")]').text.strip() if btn and goods_page_all != goods_page_now: driver.execute_script('arguments[0].click();', btn) print('click btn') of_utils.sleep(5) else: break elements = of_utils.find_elements_by_xpath( driver, '//div[@class="product_cell_thumbBox"]') for e in elements: urls.append('http://www.etude.cn' + e.get_attribute('onclick').strip().replace( 'window.open(\'', '').replace('\')', '')) return urls
def parse_entry(self, driver): btn = of_utils.find_element_by_css_selector(driver, '.view-all li a') if btn: driver.execute_script('arguments[0].click();', btn) of_utils.sleep(4) elements = of_utils.find_elements_by_css_selector( driver, "#search-result-items li a.thumb-link") return [element.get_attribute('href').strip() for element in elements]
def parse_entry(self, driver): driver.execute_script('window.scrollBy(0, document.body.scrollHeight);') btn = of_utils.find_element_by_xpath(driver,'//a[@class="pagination__all"]') if btn: driver.execute_script('arguments[0].click();', btn) of_utils.sleep(5) elements = of_utils.find_elements_by_xpath(driver, '//div[@class="product-image"]//a[@class="thumb-link"]') return [element.get_attribute('href').strip() for element in elements]
def parse_entry(self, driver): for i in range(3): driver.execute_script( 'window.scrollBy(0, document.body.scrollHeight);') of_utils.sleep(4) elements = of_utils.find_elements_by_css_selector( driver, 'div#search-result-items > div.grid-tile > div.product-tile > a') return [element.get_attribute('href').strip() for element in elements]
def parse_product(self, driver): # Switch language btn = of_utils.find_element_by_css_selector( driver, 'span.b-country_language_selector-title') driver.execute_script('arguments[0].click();', btn) of_utils.sleep(3) link = of_utils.find_element_by_css_selector( driver, 'li.b-language_selector-language_item[data-locale=zh_TW] > a') driver.execute_script('arguments[0].click();', link) of_utils.sleep(5) product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'span.b-product_name') if element: product['title'] = element.text.strip() else: raise Exception('Title not found') # code element = of_utils.find_element_by_css_selector( driver, 'div.b-product_master_id') if element: product['code'] = element.text.split(':')[-1].strip() # price_cny element = of_utils.find_element_by_css_selector( driver, 'h2.b-product_container-price > div.b-product_price > h4.b-product_price-standard' ) if element: price_text = element.text.strip()[1:].strip().replace('.', '') # 去掉开头的¥ product['price_cny'] = int(float(price_text)) # images elements = of_utils.find_elements_by_css_selector( driver, 'ul.js-thumbnails > li > img') images = [] for element in elements: image = element.text.split('?')[0].strip() images.append(image) images = [element.get_attribute('src').strip() for element in elements] product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_css_selector( driver, 'div.b-product_long_description') text = element.get_attribute('innerHTML').strip() text = text.replace('<i>', '') text = text.replace('</i>', '') text = text.replace('<br>', '\n') texts = text.split('\n') detail_texts = [] for text in texts: text = text.strip() if text != '': detail_texts.append(text) product['detail'] = '\n'.join(detail_texts) return product
def parse_entry(self, driver): btnShowAlls = of_utils.find_elements_by_css_selector(driver,'.page-show-all') if btnShowAlls: for btn in btnShowAlls: driver.execute_script('arguments[0].click();', btn) of_utils.sleep(5) elements = of_utils.find_elements_by_css_selector(driver, 'a.product-image') return [element.get_attribute('href').strip() for element in elements]
def parse_entry(self, driver): btns = of_utils.find_elements_by_css_selector(driver, 'a.shelf_view-all') for btn in btns: driver.execute_script('arguments[0].click();', btn) of_utils.sleep(4) elements = of_utils.find_elements_by_css_selector( driver, 'div.product_container > a') return [element.get_attribute('href').strip() for element in elements]
def parse_product(self, driver): of_utils.sleep(12) # Sleep for loading product = of_spider.empty_product.copy() # title element = of_utils.find_element_by_css_selector( driver, 'div.productInfo > h1.productName > div > span.modelName') if not element: element = of_utils.find_element_by_css_selector( driver, 'div.product-tit > h1') if element: product['title'] = element.text.strip() if not product['title']: product['title'] = element.get_attribute('innerHTML') else: raise Exception('Title not found') # code N/A # price_cny element = of_utils.find_element_by_css_selector( driver, 'div.productInfo > div#itemPrice') if not element: element = of_utils.find_element_by_css_selector( driver, 'div.product-handle > div.product-price') if element: price_text = element.text.strip() if price_text: price_text = price_text[1:].strip().replace(',', '') # 去掉开头的¥ product['price_cny'] = int(float(price_text)) # images elements = of_utils.find_elements_by_css_selector( driver, 'div.itempage-images-content > ul.alternativeImages > li > img') if not elements: elements = of_utils.find_elements_by_css_selector( driver, 'div.thumbnails-box > div > ul.swiper-wrapper > li > img') images = [] for element in elements: img = element.get_attribute('src').strip() img = img.replace('110X110', '500X500') images.append(img) else: images = [ element.get_attribute('src').strip() for element in elements ] product['images'] = ';'.join(images) # detail element = of_utils.find_element_by_css_selector( driver, 'div.description > div.descriptionContent') if not element: element = of_utils.find_element_by_css_selector( driver, 'div.product-description > p') text = element.text.split('\n')[0].strip() else: text = element.text.strip() product['detail'] = text return product
def parse_entry(self, driver): btn = of_utils.find_element_by_css_selector( driver, '.c-product-cards-list-all') if btn: driver.execute_script('arguments[0].click();', btn) of_utils.sleep(4) elements = of_utils.find_elements_by_css_selector( driver, '.c-product-cards-list-item .c-product-cards-photo-img') return [element.get_attribute('href').strip() for element in elements]
def parse_entry(self, driver): while True: element = of_utils.find_element_by_css_selector(driver, 'a.load-more') if element: driver.execute_script('arguments[0].click();', element) of_utils.sleep(3) else: break elements = of_utils.find_elements_by_css_selector(driver, 'div.grid-cell > a.thumb-link') return [element.get_attribute('href').strip() for element in elements]