示例#1
0
 def parse_entry(self, driver):
     more_btn = of_utils.find_element_by_css_selector(driver, 'div.pagination > div.pager > a')
     if more_btn:
         driver.execute_script('arguments[0].click();', more_btn)
     of_utils.sleep(2)
     elements = of_utils.find_elements_by_css_selector(driver, 'article.js-ProductItem  > a')
     return [element.get_attribute('href').strip() for element in elements]
示例#2
0
    def parse_entry(self, driver):
        driver.implicitly_wait(15)
        product_count = 0
        while True:
            elements = of_utils.find_elements_by_css_selector(
                driver, 'div.productItemContainer > a')
            if not elements:
                elements = of_utils.find_elements_by_css_selector(
                    driver, 'li.productItemContainer > a')
            if not elements:
                elements = of_utils.find_elements_by_css_selector(
                    driver, 'li.productItem > a')
            if not elements:
                elements = of_utils.find_elements_by_css_selector(
                    driver, 'li.lookItem > a')
            if not elements:
                elements = of_utils.find_elements_by_css_selector(
                    driver, 'ul.lv-list-container a')

            if len(elements) > product_count:
                product_count = len(elements)
                driver.execute_script(
                    'window.scrollBy(0, document.body.scrollHeight);')
                of_utils.sleep(4)
            else:
                break
        return [element.get_attribute('href').strip() for element in elements]
示例#3
0
 def parse_product(self, driver):
     of_utils.sleep(5)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(driver, 'div.breadcrumbLeaf > p.attributesUpdater.Title > span.value')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny N/A
     # images
     elements = of_utils.find_elements_by_css_selector(driver, 'div.item-alternativeImages-shots > ul > li > img')
     print(elements)
     images = []
     for element in elements:
         image = element.get_attribute('data-origin').strip().replace('_10_', '_20_')
         images.append(image)
     product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_css_selector(driver, 'span.modelName')
     product['code'] = element.get_attribute('innerHTML').strip() # Code here...
     btn = of_utils.find_element_by_css_selector(driver, 'ul.itemDetails-info-accordion > li > h2 > div.plusIcon')
     driver.execute_script('arguments[0].click();', btn)
     elements = of_utils.find_elements_by_css_selector(driver, 'div.itemdescription > ul > li')
     texts = [element.get_attribute('innerHTML').strip() for element in elements]
     product['detail'] = '\n'.join(texts)
     return product
示例#4
0
 def parse_product(self, driver):
     of_utils.sleep(2)
     # driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, ".product-name")
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny
     element = of_utils.find_element_by_css_selector(
         driver, 'label.regular-price>span')
     if element:
         product['price_euro_de'] = int(
             float(element.text.strip().replace('€', '').replace(',', '')))
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, '.product-image-gallery > a > img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail N/A
     return product
示例#5
0
 def parse_product(self, driver):
     of_utils.sleep(5)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(driver, 'h1#curr_skuName')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_css_selector(driver, 'p.pronumber')
     if element:
         product['code'] = element.text.split(':')[-1].strip()
     # price_cny
     element = of_utils.find_element_by_css_selector(driver, 'span.skuPrice')
     if not element:
         element = of_utils.find_element_by_css_selector(driver, 'span.price#skuPrice')
     if element:
         price_text = element.text.strip()[3:].strip().replace(',', '') # 去掉开头的RMB
         product['price_cny'] = int(float(price_text))
     # images
     images = []
     elements = of_utils.find_elements_by_css_selector(driver, 'ul#fullscreen_swatchpro_small > li > img')
     for element in elements:
         txt = element.get_attribute('src').split('?')[0].strip()
         images.append(txt)
     product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_css_selector(driver, 'div.description')
     product['detail'] = element.text.strip()
     return product
示例#6
0
 def parse_product(self, driver):
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(driver, 'span.product.attribute.name')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_css_selector(driver, 'span.value[itemprop=sku]')
     if element:
         product['code'] = element.text.strip()
     # price_cny
     element = of_utils.find_element_by_css_selector(driver, 'span.product-price-reveal__action__show')
     if element:
         driver.execute_script('arguments[0].click();', element)
         of_utils.sleep(2)
         element = of_utils.find_element_by_css_selector(driver, 'span.price')
         if element:
             price_text = element.text.strip()[1:].strip().replace(',', '') # 去掉开头的¥
             product['price_cny'] = int(float(price_text))
     # images
     elements = of_utils.find_elements_by_css_selector(driver, 'div.fotorama__stage__shaft > div > img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     elements = of_utils.find_elements_by_css_selector(driver, 'div.product-info-details-content > div.value > p')
     texts = [element.get_attribute('innerHTML').strip() for element in elements]
     product['detail'] = '\n'.join(texts)
     return product
示例#7
0
 def parse_product(self, driver):
     of_utils.sleep(8)  # 等待图片刷出来
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'h1.page-title')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_css_selector(
         driver, 'div.value[itemprop=sku]')
     if element:
         product['code'] = element.text.strip()
     # price_cny N/A
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, 'div[data-gallery-role=stage-shaft] > div')
     images = [
         element.get_attribute('href').strip() for element in elements
     ]
     product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_css_selector(
         driver, 'div.description > div.value')
     product['detail'] = element.text.strip()
     return product
示例#8
0
    def parse_product(self, driver):
        btn = of_utils.find_element_by_css_selector(driver, '.glCancelBtn')
        if btn:
            driver.execute_script('arguments[0].click();', btn)
            of_utils.sleep(2)

        product = of_spider.empty_product.copy()
        # title
        element = of_utils.find_element_by_css_selector(
            driver, '.product-title')
        if element:
            product['title'] = element.text.strip()
        else:
            raise Exception('Title not found')
        # code N/A
        # price_gbp
        element = of_utils.find_element_by_css_selector(
            driver, '.dark-happy-place--grey-med>span')
        if element:
            product['price_gbp'] = element.text.replace('£', '').replace(
                ',', '').replace('.00', '').strip()
        # # images
        elements = of_utils.find_elements_by_css_selector(
            driver, '.product__image-wrapper ul li img')
        images = [
            element.get_attribute('data-src').strip() for element in elements
        ]
        product['images'] = ';'.join(images)
        # # detail N/A
        return product
示例#9
0
    def parse_entry(self, driver):
        urls = []
        while True:
            elements = of_utils.find_elements_by_css_selector(
                driver, '.list-right-content .list-item .img-box a')
            if elements:
                for ele in elements:
                    if ele.get_attribute('href') != None:
                        urls.append(ele.get_attribute('href').strip())

            total_page = of_utils.find_element_by_css_selector(
                driver, '#totalPages').get_attribute('value')
            cur_page = of_utils.find_element_by_css_selector(
                driver, '#currentPage').get_attribute('value')
            # print('cur:%s,total:%s' % (cur_page,total_page))
            if cur_page != total_page:
                btn = of_utils.find_element_by_css_selector(
                    driver, '.next-page')
                if btn:
                    driver.execute_script('arguments[0].click();', btn)
                    of_utils.sleep(4)
                else:
                    break
            else:
                break
        return urls
示例#10
0
 def parse_product(self, driver):
     of_utils.sleep(12)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(driver, 'div.product-item__detail-name > div.product-name')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_css_selector(driver, 'p.productreference > span.productreference-value')
     if element:
         product['code'] = element.text.strip()
     # price_cny
     element = of_utils.find_element_by_css_selector(driver, 'div.product-item__detail-name > div.product-price > span.price-sales')
     if not element:
         element = of_utils.find_element_by_css_selector(driver, 'div.product-item__detail-price > div.product-price > span.price-sales')
     if element:
         price_text = element.text.strip()[1:].strip().replace(',', '') # 去掉开头的¥
         product['price_cny'] = int(float(price_text))
     # images
     elements = of_utils.find_elements_by_css_selector(driver, 'div.product-item__gallery-item-image > a > img')
     images = ['https://www.acnestudios.com' + element.get_attribute('data-zoom-src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_css_selector(driver, 'div.product-item__core-information > div > div.product-item__detail-info-description')
     product['detail'] = element.text.strip()
     return product
示例#11
0
    def parse_product(self, driver):
        of_utils.sleep(2)
        product = of_spider.empty_product.copy()

        ele = of_utils.find_element_by_css_selector(driver, 'div.product-name>h1')
        product['title'] = ele.text.strip() if ele else ''

        ele = of_utils.find_element_by_css_selector(driver, 'div[itemprop=sku]')
        product['code'] = ele.text.strip() if ele else ''        

        ele = of_utils.find_element_by_css_selector(driver, 'meta[itemprop=price]')
        product['price_cny'] = of_utils.convert_price(ele.get_attribute('content').strip()) if ele else 0

        # images
        imgs=[]
        eles = of_utils.find_elements_by_css_selector(driver, '.fotorama__thumb>img')
        if not eles:
            eles = of_utils.find_elements_by_css_selector(driver, '.fotorama__stage__frame>img')
        for ele in eles:
            img = ele.get_attribute('src').strip().replace('cache', '')
            for a in img.split('/'):
                if len(a) == 32:
                    img = img.replace(a,'')
            imgs.append(img)    
        product['images'] = ';'.join(imgs)
        
        return product
示例#12
0
    def parse_entry(self, driver):
        driver.implicitly_wait(15)
        loadMore = of_utils.find_element_by_xpath(
            driver, '//div[@class="loadMore isVisible"]/button')
        if loadMore:
            driver.execute_script('arguments[0].click();', loadMore)
            of_utils.sleep(4)

        product_count = 0
        while True:
            elements = of_utils.find_elements_by_xpath(
                driver,
                '//a[@class="productQB__wrapperImage js-product-qb-link"]')
            if len(elements) > product_count:
                product_count = len(elements)
                action = ActionChains(driver).move_to_element(elements[-1])
                action.send_keys(Keys.PAGE_DOWN)
                action.send_keys(Keys.PAGE_DOWN)
                action.send_keys(Keys.PAGE_DOWN)
                action.send_keys(Keys.PAGE_DOWN)
                action.send_keys(Keys.PAGE_DOWN)
                action.perform()
                of_utils.sleep(4)
            else:
                break
        return [element.get_attribute('href').strip() for element in elements]
示例#13
0
    def parse_product(self, driver):
        elements = of_utils.find_elements_by_css_selector(
            driver, '.product_list .product_1 a')
        flag = int(driver.current_url.split('?')[-1])
        element = elements[flag]
        driver.execute_script('arguments[0].click();', element)
        of_utils.sleep(2)

        product = of_spider.empty_product.copy()
        # title
        element = of_utils.find_element_by_css_selector(
            driver,
            '.popup_product_%s .product_detail_content .jspPane h2' % flag)
        if element:
            product['title'] = element.text.strip()
        else:
            raise Exception('Title not found')
        # code N/A
        # price_cny
        element = of_utils.find_element_by_css_selector(
            driver, '.popup_product_%s .product_photo h3' % flag)
        if element:
            product['price_hkd'] = element.text.strip().split(
                '/')[0].strip().replace('$', '').replace(',', '')
        # images
        elements = of_utils.find_elements_by_css_selector(
            driver, '.popup_product_%s .product_photo img' % flag)
        images = [element.get_attribute('src').strip() for element in elements]
        product['images'] = ';'.join(images)
        # detail N/A
        return product
示例#14
0
def getList(q):
    url = 'https://www.ncbi.nlm.nih.gov/pubmed/?term=%s' % q
    options = webdriver.ChromeOptions()
    options.add_argument('--ignore-certificate-errors')
    driver = webdriver.Chrome(chrome_options=options)
    driver.maximize_window()
    driver.get(url)
    driver.implicitly_wait(15)

    driver.find_element_by_xpath('//div[@id="result_action_bar"]/ul/li[3]/a').click()
    driver.find_element_by_xpath('//div[@id="display_settings_menu_ps"]/fieldset/ul/li[last()]/input').click()
    of_utils.sleep(5)
    lst = []
    while True:
        eles = driver.find_elements_by_xpath('//div[@class="rslt"]/p[@class="title"]/a')
        for e in eles:
            d = {}
            d['title'] = e.text.strip()
            d['url'] =  e.get_attribute('href')
            lst.append(d)
        # btnNext = driver.find_element_by_xpath('//a[@class="active page_link next"]')
        # if btnNext:
        #     btnNext.click()
        #     of_utils.sleep(5)
        # else:
        #     break
        break
    return lst
示例#15
0
 def parse_product(self, driver):
     of_utils.sleep(2)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'main .product-core-information .product-title')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny
     element = of_utils.find_element_by_css_selector(
         driver, 'main .product-core-information .product-price')
     if element:
         product['price_cny'] = of_utils.convert_price(element.text.strip())
     # # images
     elements = of_utils.find_elements_by_css_selector(
         driver,
         'main .product-core-images .swiper-slide-duplicate .image-zoom-inner>img'
     )
     images = [element.get_attribute('src').strip() for element in elements]
     if images:
         images = {}.fromkeys(images).keys()
     product['images'] = ';'.join(images)
     # # detail
     element = of_utils.find_element_by_css_selector(
         driver, 'main .product-description-content')
     product['detail'] = element.text.strip()
     return product
示例#16
0
 def parse_entry(self, driver):
     # 手袋
     elements = of_utils.find_elements_by_css_selector(driver, 'div.fs-products-grid__product.fs-gridelement > div.fs-products-grid__product__illu > a')
     if not elements:
         # 手袋2
         elements = of_utils.find_elements_by_css_selector(driver, 'div.fs-products-grid > div.fs-products-grid__product.fs-gridelement > div.fs-products-grid__product__wrapper > a')
     if not elements:
         # 彩妆
         elements = of_utils.find_elements_by_css_selector(driver, 'div.fnb_col-wd6.fnb_product-img > a') 
     # 手表
     if not elements:
         load_more = of_utils.find_element_by_css_selector(driver, 'div.pd-action-btns > button[role=button]')
         if not load_more: # 戒指
             load_more = of_utils.find_element_by_css_selector(driver, 'div.display-all > a')
         if load_more:
             driver.execute_script('arguments[0].click();', load_more)
         of_utils.sleep(5)
         product_count = 0
         while True:
             elements = of_utils.find_elements_by_css_selector(driver, 'div.products > div.row > div > ul > li > div.product-item-wrapper > a')
             if len(elements) > product_count:
                 product_count = len(elements)
                 driver.execute_script('window.scrollBy(0, document.body.scrollHeight);')
                 of_utils.sleep(4)
             else:
                 break
     return [element.get_attribute('href').strip() for element in elements]
示例#17
0
 def parse_product(self, driver):
     of_utils.sleep(5)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver, '//i[@class="iconfont icon-ICON_share"]/..')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_xpath(driver,
                                              '//div[@class="code"]')
     if element:
         product['code'] = element.text.strip()
     # price_cny
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="product-price"]/i')
     if element:
         product['price_cny'] = of_utils.convert_price(element.text.strip())
     # images
     elements = of_utils.find_elements_by_xpath(
         driver, '//ul[@class="small-img-list"]/li/img')
     images = [
         element.get_attribute('data-src').strip().split('?x-oss-process')
         [0] for element in elements
     ]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail N/A
     element = of_utils.find_element_by_xpath(
         driver, '//div[@class="desc"]//div[@class="content"]')
     if element:
         product['detail'] = element.text.strip()
     return product
示例#18
0
 def parse_product(self, driver):
     driver.implicitly_wait(15)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(driver, 'div.product-main-bloc > div > div > h1')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code N/A
     element = of_utils.find_element_by_css_selector(driver, 'p.reference > span')
     if element:
         product['code'] = element.text.strip()
     # price_cny N/A
     # images
     while True:
         elements = of_utils.find_elements_by_css_selector(driver, 'div.product-media > img')
         images = [element.get_attribute('data-original').strip() for element in elements]
         print(images[0])
         if not images[0].endswith('.gif'):
             break
         else:
             of_utils.sleep(5)
     # detail
     texts = []
     element = of_utils.find_element_by_css_selector(driver, 'p.shortDescription')
     texts.append(element.get_attribute('innerHTML').strip())
     element = of_utils.find_element_by_css_selector(driver, 'div.box-collateral > p')
     texts.append(element.get_attribute('innerHTML').strip())
     product['detail'] = '\n'.join(texts)
     return product
示例#19
0
 def parse_product(self, driver):
     of_utils.sleep(2)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_xpath(
         driver, '//h1[contains(@class,"c-product-name-pdp")]')
     if element:
         product['title'] = element.text.strip().replace('\n', ' ')
     else:
         raise Exception('Title not found')
     # code N/A
     element = of_utils.find_element_by_xpath(
         driver, '//span[contains(@class,"o-utility")]')
     if element:
         product['code'] = element.text.strip().replace('商品编号 :',
                                                        '').strip()
     # price_cny N/A
     # images
     elements = of_utils.find_elements_by_xpath(
         driver, '//div[contains(@class,"product-image-first")]/img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join({}.fromkeys(images).keys())
     # detail
     element = of_utils.find_element_by_xpath(
         driver, '//p[@class="text--center c-collection-desc"]')
     if element:
         product['detail'] = element.text.strip()
     return product
示例#20
0
 def parse_product(self, driver):
     of_utils.sleep(4)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, '.product-information .product-name')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny
     element = of_utils.find_element_by_css_selector(
         driver, '.product-information .product-price .p-price')
     if element:
         product['price_cny'] = of_utils.convert_price(element.text.strip())
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, '.product-information .product-details-image-gallery img')
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail N/A
     element = of_utils.find_element_by_css_selector(
         driver, '.product-information .product-short-description')
     if element:
         product['detail'] = element.text.strip()
     return product
示例#21
0
 def parse_entry(self, driver):
     of_utils.sleep(5)
     urls = []
     while True:
         btn = of_utils.find_element_by_xpath(
             driver, '//div[contains(@class,"etude-btn-listMore")]')
         goods_page_all = of_utils.find_element_by_xpath(
             driver,
             '//span[contains(@class,"goods_page_all")]').text.strip()
         goods_page_now = of_utils.find_element_by_xpath(
             driver,
             '//span[contains(@class,"goods_page_now")]').text.strip()
         if btn and goods_page_all != goods_page_now:
             driver.execute_script('arguments[0].click();', btn)
             print('click btn')
             of_utils.sleep(5)
         else:
             break
     elements = of_utils.find_elements_by_xpath(
         driver, '//div[@class="product_cell_thumbBox"]')
     for e in elements:
         urls.append('http://www.etude.cn' +
                     e.get_attribute('onclick').strip().replace(
                         'window.open(\'', '').replace('\')', ''))
     return urls
示例#22
0
 def parse_entry(self, driver):
     btn = of_utils.find_element_by_css_selector(driver, '.view-all li a')
     if btn:
         driver.execute_script('arguments[0].click();', btn)
         of_utils.sleep(4)
     elements = of_utils.find_elements_by_css_selector(
         driver, "#search-result-items li a.thumb-link")
     return [element.get_attribute('href').strip() for element in elements]
示例#23
0
 def parse_entry(self, driver):
     driver.execute_script('window.scrollBy(0, document.body.scrollHeight);')
     btn = of_utils.find_element_by_xpath(driver,'//a[@class="pagination__all"]')
     if btn:
         driver.execute_script('arguments[0].click();', btn)
         of_utils.sleep(5)
     elements = of_utils.find_elements_by_xpath(driver, '//div[@class="product-image"]//a[@class="thumb-link"]')
     return [element.get_attribute('href').strip() for element in elements]  
示例#24
0
 def parse_entry(self, driver):
     for i in range(3):
         driver.execute_script(
             'window.scrollBy(0, document.body.scrollHeight);')
         of_utils.sleep(4)
     elements = of_utils.find_elements_by_css_selector(
         driver,
         'div#search-result-items > div.grid-tile >  div.product-tile > a')
     return [element.get_attribute('href').strip() for element in elements]
示例#25
0
 def parse_product(self, driver):
     # Switch language
     btn = of_utils.find_element_by_css_selector(
         driver, 'span.b-country_language_selector-title')
     driver.execute_script('arguments[0].click();', btn)
     of_utils.sleep(3)
     link = of_utils.find_element_by_css_selector(
         driver,
         'li.b-language_selector-language_item[data-locale=zh_TW] > a')
     driver.execute_script('arguments[0].click();', link)
     of_utils.sleep(5)
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'span.b-product_name')
     if element:
         product['title'] = element.text.strip()
     else:
         raise Exception('Title not found')
     # code
     element = of_utils.find_element_by_css_selector(
         driver, 'div.b-product_master_id')
     if element:
         product['code'] = element.text.split(':')[-1].strip()
     # price_cny
     element = of_utils.find_element_by_css_selector(
         driver,
         'h2.b-product_container-price > div.b-product_price > h4.b-product_price-standard'
     )
     if element:
         price_text = element.text.strip()[1:].strip().replace('.',
                                                               '')  # 去掉开头的¥
         product['price_cny'] = int(float(price_text))
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver, 'ul.js-thumbnails > li > img')
     images = []
     for element in elements:
         image = element.text.split('?')[0].strip()
         images.append(image)
     images = [element.get_attribute('src').strip() for element in elements]
     product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_css_selector(
         driver, 'div.b-product_long_description')
     text = element.get_attribute('innerHTML').strip()
     text = text.replace('<i>', '')
     text = text.replace('</i>', '')
     text = text.replace('<br>', '\n')
     texts = text.split('\n')
     detail_texts = []
     for text in texts:
         text = text.strip()
         if text != '':
             detail_texts.append(text)
     product['detail'] = '\n'.join(detail_texts)
     return product
示例#26
0
 def parse_entry(self, driver):
     btnShowAlls = of_utils.find_elements_by_css_selector(driver,'.page-show-all')
     if btnShowAlls:
         for btn in btnShowAlls:
             driver.execute_script('arguments[0].click();', btn)
             of_utils.sleep(5)
     
     elements = of_utils.find_elements_by_css_selector(driver, 'a.product-image')
     return [element.get_attribute('href').strip() for element in elements]
示例#27
0
 def parse_entry(self, driver):
     btns = of_utils.find_elements_by_css_selector(driver,
                                                   'a.shelf_view-all')
     for btn in btns:
         driver.execute_script('arguments[0].click();', btn)
         of_utils.sleep(4)
     elements = of_utils.find_elements_by_css_selector(
         driver, 'div.product_container > a')
     return [element.get_attribute('href').strip() for element in elements]
示例#28
0
 def parse_product(self, driver):
     of_utils.sleep(12)  # Sleep for loading
     product = of_spider.empty_product.copy()
     # title
     element = of_utils.find_element_by_css_selector(
         driver, 'div.productInfo > h1.productName > div > span.modelName')
     if not element:
         element = of_utils.find_element_by_css_selector(
             driver, 'div.product-tit > h1')
     if element:
         product['title'] = element.text.strip()
         if not product['title']:
             product['title'] = element.get_attribute('innerHTML')
     else:
         raise Exception('Title not found')
     # code N/A
     # price_cny
     element = of_utils.find_element_by_css_selector(
         driver, 'div.productInfo > div#itemPrice')
     if not element:
         element = of_utils.find_element_by_css_selector(
             driver, 'div.product-handle > div.product-price')
     if element:
         price_text = element.text.strip()
         if price_text:
             price_text = price_text[1:].strip().replace(',', '')  # 去掉开头的¥
             product['price_cny'] = int(float(price_text))
     # images
     elements = of_utils.find_elements_by_css_selector(
         driver,
         'div.itempage-images-content > ul.alternativeImages > li > img')
     if not elements:
         elements = of_utils.find_elements_by_css_selector(
             driver,
             'div.thumbnails-box > div > ul.swiper-wrapper > li > img')
         images = []
         for element in elements:
             img = element.get_attribute('src').strip()
             img = img.replace('110X110', '500X500')
             images.append(img)
     else:
         images = [
             element.get_attribute('src').strip() for element in elements
         ]
     product['images'] = ';'.join(images)
     # detail
     element = of_utils.find_element_by_css_selector(
         driver, 'div.description > div.descriptionContent')
     if not element:
         element = of_utils.find_element_by_css_selector(
             driver, 'div.product-description > p')
         text = element.text.split('\n')[0].strip()
     else:
         text = element.text.strip()
     product['detail'] = text
     return product
示例#29
0
    def parse_entry(self, driver):
        btn = of_utils.find_element_by_css_selector(
            driver, '.c-product-cards-list-all')
        if btn:
            driver.execute_script('arguments[0].click();', btn)
            of_utils.sleep(4)

        elements = of_utils.find_elements_by_css_selector(
            driver, '.c-product-cards-list-item .c-product-cards-photo-img')
        return [element.get_attribute('href').strip() for element in elements]
示例#30
0
 def parse_entry(self, driver):
     while True:
         element = of_utils.find_element_by_css_selector(driver, 'a.load-more')
         if element:
             driver.execute_script('arguments[0].click();', element)
             of_utils.sleep(3)
         else:
             break
     elements = of_utils.find_elements_by_css_selector(driver, 'div.grid-cell > a.thumb-link')
     return [element.get_attribute('href').strip() for element in elements]