Пример #1
0
 def test_header_about_submenu_company_redirect(self):
     about_link = self.getAboutLink()
     actions = chains(self.driver)
     actions.move_to_element(about_link).perform()
     company_link = self.getSubmenuAbout()[0]
     path = self.clickEvent(company_link)
     assert path == self.CONST.get(
         'SUBMENU_COMPANY'), 'submenu COMPANY link is not working'
Пример #2
0
 def test_header_about_submenu_testimonial_redirect(self):
     about_link = self.getAboutLink()
     actions = chains(self.driver)
     actions.move_to_element(about_link).perform()
     testimonial_link = self.getSubmenuAbout()[2]
     path = self.clickEvent(testimonial_link)
     assert path == self.CONST.get(
         'SUBMENU_TESTIMONIALS'), 'submenu TESTIMONIALS link is not working'
Пример #3
0
 def test_header_resources_submenu_agileTools_redirect(self):
     resources_link = self.getResourcesLink()
     actions = chains(self.driver)
     actions.move_to_element(resources_link).perform()
     agile_tools_link = self.getSubmenuResources()[3]
     path = self.clickEvent(agile_tools_link)
     assert path == self.CONST.get(
         'SUBMENU_AGILE_TOOLS'), 'submenu AGILE TOOLS link is not working'
Пример #4
0
 def test_header_about_submenu_careers_redirect(self):
     about_link = self.getAboutLink()
     actions = chains(self.driver)
     actions.move_to_element(about_link).perform()
     careers_link = self.getSubmenuAbout()[3]
     path = self.clickEvent(careers_link)
     assert path == self.CONST.get(
         'SUBMENU_CAREERS'), 'submenu CAREERS link is not working'
Пример #5
0
 def test_header_training_submenu_agileFramework_redirect(self):
     training_link = self.getTrainingLink()
     actions = chains(self.driver)
     actions.move_to_element(training_link).perform()
     agile_framework_link = self.getSubmenuTraining()[0]
     path = self.clickEvent(agile_framework_link)
     assert path == self.CONST.get(
         'SUBMENU_AGILEFRAMEWORK'
     ), 'submenu AGILE FRAMEWORK link is not working'
Пример #6
0
 def test_header_training_submenu_agileCulture_redirect(self):
     training_link = self.getTrainingLink()
     actions = chains(self.driver)
     actions.move_to_element(training_link).perform()
     agile_culture_link = self.getSubmenuTraining()[2]
     path = self.clickEvent(agile_culture_link)
     assert path == self.CONST.get(
         'SUBMENU_AGILECULTURE'
     ), 'submenu AGILE CULTURE link is not working'
Пример #7
0
 def test_header_training_submenu_agilepractices_redirect(self):
     training_link = self.getTrainingLink()
     actions = chains(self.driver)
     actions.move_to_element(training_link).perform()
     agile_practices_link = self.getSubmenuTraining()[1]
     path = self.clickEvent(agile_practices_link)
     assert path == self.CONST.get(
         'SUBMENU_AGILEPRACTICES_TOOLS'
     ), 'submenu AGILE PRACTICES and tools link is not working'
Пример #8
0
 def test_header_resources_submenu_scrumEvents_redirect(self):
     resources_link = self.getResourcesLink()
     actions = chains(self.driver)
     actions.move_to_element(resources_link).perform()
     scrum_events_link = self.getSubmenuResources()[0]
     path = self.clickEvent(scrum_events_link)
     assert path == self.CONST.get(
         'SUBMENU_SCRUM_EVENTS'
     ), 'submenu SCRUM EVENTS PLAYBOOK link is not working'
Пример #9
0
 def test_header_resources_submenu_agilePresentation_redirect(self):
     resources_link = self.getResourcesLink()
     actions = chains(self.driver)
     actions.move_to_element(resources_link).perform()
     agile_presentations_link = self.getSubmenuResources()[2]
     path = self.clickEvent(agile_presentations_link)
     assert path == self.CONST.get(
         'SUBMENU_AGILE_PRESENTATIONS'
     ), 'submenu AGILE PRESENTATIONS link is not working'
Пример #10
0
 def buscar_categoria(self, categoria, subcategoria):
     driver = self.driver
     actions = chains(driver)
     categorias = driver.find_element_by_class_name(
         "nav-menu-categories-link")
     actions.move_to_element(categorias).perform()
     subCat = driver.find_element_by_xpath('//a[contains(text(), "' +
                                           categoria + '")]')
     actions.move_to_element(subCat).perform()
     subCat2 = driver.find_element_by_xpath('//a[contains(text(), "' +
                                            subcategoria + '")]').click()
Пример #11
0
 def test_header_trianing_submenu(self):
     training_link = self.getTrainingLink()
     actions = chains(self.driver)
     actions.move_to_element(training_link).perform()
     agile_framework_link, agile_practices_link, agile_culture_link = self.getSubmenuTraining(
     )
     assert agile_framework_link.is_displayed(
     ), 'submenu AGILE FRAMEWORK is not being displayed'
     assert agile_practices_link.is_displayed(
     ), 'submenu AGILE PRACTICES is not being displayed'
     assert agile_culture_link.is_displayed(
     ), 'submenu AGILE CULTURE is not being displayed'
Пример #12
0
 def test_header_about_submenu(self):
     about_link = self.getAboutLink()
     actions = chains(self.driver)
     actions.move_to_element(about_link).perform()
     company_link, partners_link, testimonials_link, careers_link = self.getSubmenuAbout(
     )
     assert company_link.is_displayed(
     ), 'submenu COMPANY is not being displayed'
     assert partners_link.is_displayed(
     ), 'submenu PARTNERS is not being displayed'
     assert testimonials_link.is_displayed(
     ), 'submenu TESTIMONIALS is not being displayed'
     assert careers_link.is_displayed(
     ), 'submenu CAREERS is not being displayed'
Пример #13
0
 def test_header_resources_submenu(self):
     resources_link = self.getResourcesLink()
     scrum_events_link, agile_videos_link, agile_presentations_link, agile_tools_link = self.getSubmenuResources(
     )
     actions = chains(self.driver)
     actions.move_to_element(resources_link).perform()
     assert scrum_events_link.is_displayed(
     ), 'submenu SCRUM EVENTS PLAYBOOK is not being displayed'
     assert agile_videos_link.is_displayed(
     ), 'submenu AGILE VIDEOS is not being displayed'
     assert agile_presentations_link.is_displayed(
     ), 'submenu AGILE PRESENTATIONS is not being displayed'
     assert agile_tools_link.is_displayed(
     ), 'submenu AGILE TOOLS is not being displayed'
Пример #14
0
def fill_from(object, strAction):
    print("fill form")
    browser = object
    listinput = []

    try:
        dropdownbox = browser.find_element_by_css_selector(
            "#clearing-house-dropdown-menu-button")
        dropdownbox.click()
        sleep(5)
        dropdownboxitems = browser.find_element_by_css_selector(
            "#participant-info-section > div:nth-child(1) > div > div.clearing-house.selector.show.open > div > a:nth-child(1)"
        )
        dropdownboxitems.click()
    except:
        print('dropdownbox error')

    selects = browser.find_elements_by_tag_name("select")
    print('len(selects)')
    print(len(selects))
    for arg in selects[0:]:
        try:
            Select(arg).select_by_index(1)
        except:
            print('Select error')

    labels = browser.find_elements_by_tag_name("label")
    print('len(labels)')
    print(len(labels))

    for arg in labels[0:]:
        try:
            arg.click()
        except:
            print('labels')

    es = browser.find_elements_by_tag_name("input")
    print('len(input)')
    print(len(es))

    for arg in es[0:]:
        try:
            if (arg.get_attribute('value') != ''):
                print(arg.get_attribute('value'))
                continue
        except Exception as argx:
            print(argx)
        if ((arg.get_attribute('placeholder') == 'Type your answer here...')
                and ((arg.get_attribute('aria-label') == 'Email Address') or
                     (arg.get_attribute('type') == 'email'))):
            try:
                arg.send_keys('*****@*****.**')
                listinput.append('*****@*****.**')
            except:
                print(arg)
        elif (arg.get_attribute('placeholder') == 'Type your answer here...'
              ) and (arg.get_attribute('maxlength') == '50'):
            try:
                arg.send_keys('*****@*****.**')
                #listinput.append('*****@*****.**')
            except:
                print(arg)
        elif (
            (arg.get_attribute('placeholder') == 'Type your answer here...') or
            (arg.get_attribute('placeholder')
             == 'Type telephone number here...')
        ) and (
            (arg.get_attribute('aria-label') == 'Telephone Number') or
            (arg.get_attribute('aria-labelledby') ==
             'guideContainer-rootPanel-contactInformation-contactPersons-contact-phone___guideFieldShortDescription'
             )):
            try:
                arg.send_keys('123-1234-1234')
                listinput.append('123-1234-1234')
            except:
                print(arg)
        elif (
                arg.get_attribute('placeholder') == 'Type your answer here...'
        ) and (arg.get_attribute(
                'aria-labelledby'
        ) == 'guideContainer-rootPanel-contactinformationse-contactInformationFragment-confirmationDetailsWrapper-loginuserid___guideFieldShortDescription'
               ):
            try:
                arg.send_keys('BO_C88888')
                listinput.append('BO_C88888')
            except:
                print(arg)
        elif (arg.get_attribute('placeholder')
              == 'dd-mmm-yyyy') or (arg.get_attribute('placeholder')
                                    == 'dd-mmm-yy'):
            try:
                arg.send_keys(
                    (datetime.datetime.now() +
                     datetime.timedelta(days=7)).strftime("%d-%b-%Y"))
                listinput.append(
                    (datetime.datetime.now() +
                     datetime.timedelta(days=7)).strftime("%d-%b-%Y").lower())
            except:
                print(arg)
        elif (arg.get_attribute('placeholder') == 'dd-mmm-yy'):
            try:
                arg.send_keys(
                    (datetime.datetime.now() +
                     datetime.timedelta(days=7)).strftime("%d-%b-%y"))
                listinput.append(
                    (datetime.datetime.now() +
                     datetime.timedelta(days=7)).strftime("%d-%b-%Y").lower())
            except:
                print(arg)
        elif (arg.get_attribute('aria-label') == 'Stock Code'):
            try:
                arg.send_keys(600)
                sleep(3)
                menu = browser.find_element_by_xpath(
                    "//*[@id=\"guideContainer-rootPanel-detailsOfFailedDeliveryPositionSection-detailsOfFailedDeliveryPositionFragment-stockCode__\"]/table/tbody/tr/td[2]"
                )
                actions = chains(browser)
                #actions.move_to_element(menu)
                #sleep(3)
                actions.move_to_element(menu).click().perform()
                listinput.append(600)
            except Exception as e:
                print(e)
        elif (arg.get_attribute('aria-label') == 'Stock Name'):
            try:
                #arg.send_keys('CHINA INFRASTRUCTURE INVESTMENT LIMITED')
                listinput.append('CHINA INFRASTRUCTURE INVESTMENT LIMITED')
            except:
                print(arg)
        elif (arg.get_attribute(
                'aria-labelledby'
        ) == 'guideContainer-rootPanel-detailsOfFailedDeliveryPositionSection-detailsOfFailedDeliveryPositionFragment-settlementPosNo___guideFieldShortDescription'
              ):
            try:
                arg.send_keys('P12345678')
                listinput.append('P12345678')
            except:
                print(arg)
        elif (arg.get_attribute(
                'aria-labelledby'
        ) == 'guideContainer-rootPanel-detailsOfFailedDeliveryPositionSection-detailsOfFailedDeliveryPositionFragment-shareQuantityApplied___guideFieldShortDescription'
              ):
            try:
                arg.send_keys('12345678')
                listinput.append('12345678')
            except:
                print(arg)
        elif (arg.get_attribute('placeholder') == 'Type your answer here...'):
            try:
                #numberrx =random.randint(12345678901234567890,123456789012345678901234567890)
                arg.send_keys("12345678")
                listinput.append(arg.get_attribute('value'))
            except Exception as exa:
                print(exa)
        else:
            try:
                arg.send_keys('1')
            except:
                print(arg)

    textareas = browser.find_elements_by_tag_name("textarea")
    print('len(textareas)')
    print(len(textareas))
    for arg in textareas[0:]:
        try:
            arg.send_keys(
                'abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij'
            )
            listinput.append(arg.get_attribute('value'))
        except:
            print('textareas error')

    selects = browser.find_elements_by_tag_name("select")
    print('len(selects)')
    print(len(selects))
    for arg in selects[0:]:
        try:
            Select(arg).select_by_index(1)
        except:
            print('Select error')

    for arg in es[0:]:
        try:
            print(arg.get_attribute('aria-selected'))
            if arg.get_attribute('aria-selected') == 'true':
                listinput.append(arg.get_attribute('aria-label').lower())
                print(arg.get_attribute('value'))
        except:
            print('radio')
    print(listinput)

    try:
        browser.find_element_by_id("preview-btn").click()
        sleep(3)
        browser.find_element_by_id("preview-btn").click()
        sleep(3)
        browser.find_element_by_id("preview-btn").click()
        sleep(5)
    except:
        print('preview error')

    try:
        buttons = browser.find_element_by_xpath("//button[text()='" +
                                                strAction.capitalize() + "']")
        buttons.click()
        sleep(3)
        vbuttons = browser.find_element_by_xpath("//button[text()='" +
                                                 strAction.capitalize() + "']")
        vbuttons.click()
        sleep(3)
    except:
        print('action error ' + strAction)
    handles = browser.window_handles
    browser.switch_to.window(handles[0])
    try:
        browser.find_element_by_id("message-text").send_keys(strAction)
        browser.find_element_by_css_selector(
            "div.modal-footer > button:nth-child(2)").click()
    except:
        print("cant click")
        return 'error'
    sleep(15)
    return listinput
Пример #15
0
 def close_tab(self):
     chains(self.driver).key_down(Keys.CONTROL).send_keys('w').key_up(
         Keys.CONTROL).perform()
Пример #16
0
    def parse(self, response):
        self.driver.get(response.url)
        self.driver.execute_script(
            "Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});"
        )
        self.driver.execute_script(
            "Object.defineProperty(navigator, 'languages', {get: function() {return ['ko-KR', 'ko']}})"
        )
        self.driver.execute_script(
            "const getParameter = WebGLRenderingContext.getParameter;WebGLRenderingContext.prototype.getParameter = function(parameter) {if (parameter === 37445) {return 'NVIDIA Corporation'} if (parameter === 37446) {return 'NVIDIA GeForce GTX 980 Ti OpenGL Engine';}return getParameter(parameter);};"
        )

        self.sel = scrapy.Selector(text=self.driver.page_source)
        cat_list = self.sel.xpath(
            '//*[@id="leftMenu"]/div/div/div/ul/li/a/text()').extract()
        for ci, c in enumerate(cat_list):
            cat_list[ci] = c.strip()

        for catidx, category in enumerate(cat_list):
            if catidx < 1:
                continue
            xpath = '//*[@id="leftMenu"]/div/div/div/ul/li[{}]/a'.format(
                str(catidx + 1))

            # click category
            try:
                actions = chains(self.driver)
                xpath = '//*[@id="leftMenu"]/div/div/div/ul/li[{}]/a'.format(
                    str(catidx + 1))
                cat = self.driver.find_element_by_xpath(xpath)
                actions.move_to_element(cat).perform()
                to_be_clicked = WebDriverWait(self.driver, 120).until(
                    EC.element_to_be_clickable((By.XPATH, xpath)))
            except TimeoutException:
                print("\n\nERROR WHILE CLICKING color")
                continue
            self.driver.execute_script('arguments[0].click()', to_be_clicked)
            __delay_time__()
            self.sel = scrapy.Selector(text=self.driver.page_source)
            print('\n\n {} clicked\n\n'.format(category))

            curr_page_num = 1
            while True:
                # crawl items
                xpath = '//*[@id="productListDiv"]/div/div/ul/li'
                item_lists = self.sel.xpath(xpath).extract()
                for li_idx in range(len(item_lists)):
                    curr_url = self.driver.current_url
                    xpath = '//*[@id="productListDiv"]/div/div/ul/li[{}]/div/div/a/@href'.format(
                        str(li_idx + 1))
                    script_res = self.sel.xpath(xpath).extract_first()
                    script_res = script_res.split("'")
                    productNo = script_res[1]
                    if productNo in self.progress:
                        self.driver.get(curr_url)
                        __delay_time__()
                        self.sel = scrapy.Selector(
                            text=self.driver.page_source)
                        continue
                    infwPathTp = script_res[3]
                    infwPathNo = script_res[5]
                    brand = self.sel.xpath(
                        '//*[@id="productListDiv"]/div/div/ul/li[{}]/div[2]/div[1]/span/text()'
                        .format(str(li_idx + 1))).extract_first()
                    url_string = "productNo={}&infwPathTp={}&infwPathNo={}".format(
                        productNo, infwPathTp, infwPathNo)
                    url = 'http://fashion.sivillage.com/product/productDetail?' + url_string
                    self.driver.get(url)
                    __delay_time__()
                    self.sel = scrapy.Selector(text=self.driver.page_source)

                    # scrape page
                    name = self.sel.xpath(
                        '//*[@id="content"]/div[1]/div[2]/div[1]/div[4]/text()'
                    ).extract_first()
                    if name is None:
                        self.driver.get(curr_url)
                        __delay_time__()
                        self.sel = scrapy.Selector(
                            text=self.driver.page_source)
                        continue
                    else:
                        name = name.strip()
                    prod_num = self.sel.xpath(
                        '//*[@id="content"]/div[1]/div[2]/div[1]/div[2]/text()'
                    ).extract_first().strip()
                    print('\n\n\n\n\n{}\n\n\n\n\n'.format(prod_num))
                    if prod_num in self.progress:
                        self.driver.get(curr_url)
                        __delay_time__()
                        self.sel = scrapy.Selector(
                            text=self.driver.page_source)
                        continue

                    price = self.sel.xpath(
                        '//*[@id="content"]/div[1]/div[2]/div[1]/div[5]/span/text()'
                    ).extract_first()
                    prod_desc = self.sel.xpath(
                        '//*[@id="content"]/div[1]/div[2]/div[6]/dl[1]/dd/div/div[1]/span[2]/text()'
                    ).extract()
                    if prod_desc is None:
                        prod_desc = ''
                    else:
                        prod_desc = ' '.join(prod_desc)
                    colors = self.sel.xpath(
                        '//*[@id="content"]/div[1]/div[2]/div[4]/div/dl[1]/dd/ul/li/a/img/@alt'
                    ).extract()
                    color = {}

                    for c_idx, c in enumerate(colors):
                        if c not in color:
                            color[c] = {'img_url': [], 'size': [], 'rel': []}
                        else:
                            continue

                        # click color
                        try:
                            actions = chains(self.driver)
                            xpath = '//*[@id="content"]/div[1]/div[2]/div[4]/div/dl[1]/dd/ul/li[{}]/a'.format(
                                str(c_idx + 1))
                            col = self.driver.find_element_by_xpath(xpath)
                            actions.move_to_element(col).perform()
                            to_be_clicked = WebDriverWait(
                                self.driver, 120).until(
                                    EC.element_to_be_clickable(
                                        (By.XPATH, xpath)))
                        except TimeoutException:
                            print("\n\nERROR WHILE CLICKING color")
                            continue
                        self.driver.execute_script('arguments[0].click()',
                                                   to_be_clicked)
                        __delay_time__()
                        self.sel = scrapy.Selector(
                            text=self.driver.page_source)

                        size = self.sel.xpath(
                            '//ul[@class="size"]/li/a[not(contains(@class,"disabled"))]/text()'
                        ).extract()
                        color[c]['size'] = size
                        imgs = self.sel.xpath(
                            '//*[@id="mImgDiv"]/div/a/img/@src').extract()
                        color[c]['img_url'] = imgs

                    # write progress
                    __record_progress__(productNo)
                    # write data
                    __record_data__(url, category, brand, name, price,
                                    prod_num, prod_desc, color)
                    # go back to main page
                    self.driver.get(curr_url)
                    __delay_time__()
                    self.sel = scrapy.Selector(text=self.driver.page_source)

                # click next page
                page_list = self.sel.xpath(
                    '//div[@class="paging"]/a/text()').extract()
                print(page_list)
                next_page_num = curr_page_num + 1
                if str(next_page_num) not in page_list:
                    if 'Next' in page_list:
                        # click next button
                        xpath = '//*[@id="productListDiv"]/div/div/ul/li[1]/div/div/a/@data-prod-no'
                        prev_page_item = self.sel.xpath(xpath).extract_first()
                        while True:
                            xpath = '//div[@class="paging"]/a[contains(text(), "Next")]'
                            try:
                                to_be_clicked = WebDriverWait(
                                    self.driver, 120).until(
                                        EC.element_to_be_clickable(
                                            (By.XPATH, xpath)))
                            except TimeoutException:
                                print("\n\nERROR WHILE CILCKING Next Button")
                                continue
                            self.driver.execute_script('arguments[0].click()',
                                                       to_be_clicked)
                            __delay_time__()
                            self.sel = scrapy.Selector(
                                text=self.driver.page_source)
                            xpath = '//*[@id="productListDiv"]/div/div/ul/li[1]/div/div/a/@data-prod-no'
                            curr_item = self.sel.xpath(xpath).extract_first()
                            if prev_page_item != curr_item:
                                print(
                                    "\n\nNEXT BUTTON CLICKED: page now {}\n\n".
                                    format(str(curr_page_num + 1)))
                                curr_page_num += 1
                                break
                    else:
                        # final page
                        break
                else:
                    # click next_page
                    xpath = '//*[@id="productListDiv"]/div/div/ul/li[1]/div/div/a/@data-prod-no'
                    prev_page_item = self.sel.xpath(xpath).extract_first()
                    while True:
                        xpath = '//div[@class="paging"]/a[contains(text(), "{}")]'.format(
                            str(next_page_num))
                        try:
                            to_be_clicked = WebDriverWait(
                                self.driver, 120).until(
                                    EC.element_to_be_clickable(
                                        (By.XPATH, xpath)))
                        except TimeoutException:
                            print("\n\nERROR WHILE CILCKING next page")
                            continue
                        self.driver.execute_script('arguments[0].click()',
                                                   to_be_clicked)
                        __delay_time__()
                        self.sel = scrapy.Selector(
                            text=self.driver.page_source)
                        xpath = '//*[@id="productListDiv"]/div/div/ul/li[1]/div/div/a/@data-prod-no'
                        curr_item = self.sel.xpath(xpath).extract_first()
                        if prev_page_item != curr_item:
                            print("\n\nNEXT PAGE CLICKED: page now {}\n\n".
                                  format(str(curr_page_num + 1)))
                            curr_page_num += 1
                            break
Пример #17
0
    def parse(self, response):
        self.driver.get(response.url)
        self.driver.execute_script(
            "Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});"
        )
        self.driver.execute_script(
            "Object.defineProperty(navigator, 'languages', {get: function() {return ['ko-KR', 'ko']}})"
        )
        self.driver.execute_script(
            "const getParameter = WebGLRenderingContext.getParameter;WebGLRenderingContext.prototype.getParameter = function(parameter) {if (parameter === 37445) {return 'NVIDIA Corporation'} if (parameter === 37446) {return 'NVIDIA GeForce GTX 980 Ti OpenGL Engine';}return getParameter(parameter);};"
        )

        upper_class_index = [3, 4, 5, 6, 7]

        for upidx in upper_class_index:
            subclass_xpath = '//*[@id="cate_m_main"]/li[3]/div/div/ul/li[{}]/ul/li/a/text()'.format(
                str(upidx))
            subclass_lists = response.xpath(subclass_xpath).extract()

            for subidx, subclass in enumerate(subclass_lists):
                xpath = '//*[@id="cate_m_main"]/li[3]/div/div/ul/li[{}]/ul/li[{}]/a'.format(
                    str(upidx), str(subidx + 1))

                # click sub category
                while True:
                    # click sub category
                    try:
                        actions = chains(self.driver)
                        man = self.driver.find_element_by_xpath(
                            '//*[@id="cate_m_main"]/li[3]/a[@href="/ko/item/me"]'
                        )
                        actions.move_to_element(man).perform()
                        to_be_clicked = WebDriverWait(self.driver, 120).until(
                            EC.element_to_be_clickable((By.XPATH, xpath)))
                    except TimeoutException:
                        print("\n\nERROR WHILE CLICKING {}".format(subclass))
                        continue
                    self.driver.execute_script('arguments[0].click()',
                                               to_be_clicked)
                    __delay_time__()
                    self.sel = scrapy.Selector(text=self.driver.page_source)
                    xpath = '//*[@id="bodyWrap"]/h3/span/a[3]/text()'
                    check = self.sel.xpath(xpath).extract_first()
                    if check == subclass:
                        break
                print(subclass + ' clicked\n')

                last_page = False
                urls = []
                while True:
                    xpath = '//*[@id="listBody"]/li'
                    lists = self.sel.xpath(xpath).extract()
                    for li_idx in range(len(lists)):
                        curr_url = self.driver.current_url
                        xpath = '//*[@id="listBody"]/li[{}]/div/a[1]/@href'.format(
                            str(li_idx + 1))
                        rel_url = self.sel.xpath(xpath).extract_first()
                        url = urllib.parse.urljoin(
                            'http://www.thehandsome.com/', rel_url)
                        self.driver.get(url)
                        __delay_time__()
                        self.sel = scrapy.Selector(
                            text=self.driver.page_source)
                        # check page_source changes
                        #scrape page
                        # category, brand, name, price, prod_num, prod_desc, color, size, img, url
                        category = subclass
                        brand = self.sel.xpath(
                            '//*[@id="contentDiv"]/div[1]/div[1]/h4/a/span/span/text()'
                        ).extract_first()
                        name = self.sel.xpath(
                            '//*[@id="contentDiv"]/div[1]/div[1]/h4/span/text()'
                        ).extract_first().strip()
                        price = self.sel.xpath(
                            '//*[@id="contentDiv"]/div[1]/div[1]/p[1]/span/text()'
                        ).extract_first()[1:]
                        prod_num = self.sel.xpath(
                            '//*[@id="contentDiv"]/div[1]/div[1]/p[2]/text()'
                        ).extract_first().strip()
                        if prod_num in self.progress:
                            self.driver.get(curr_url)
                            __delay_time__()
                            self.sel = scrapy.Selector(
                                text=self.driver.page_source)
                            continue

                        xpath = '//*[@id="contentDiv"]/div[1]/div[1]/p[3]/text()'
                        prod_desc = self.sel.xpath(xpath).extract_first()
                        if prod_desc is None:
                            prod_desc = ''
                        else:
                            prod_desc = prod_desc.strip()
                        colors = self.sel.xpath(
                            '//*[@id="contentDiv"]/div[1]/div[3]/ul/li[1]/div/ul/li/a/@onmouseover'
                        ).extract()
                        color = {}
                        # {'brand', 'name', 'product_num'}

                        for c_idx, c in enumerate(colors):
                            c = c.split("'")[1]
                            if c not in color:
                                color[c] = {
                                    'img_url': [],
                                    'size': [],
                                    'rel': []
                                }
                            else:
                                continue
                            # click color
                            try:
                                actions = chains(self.driver)
                                xpath = '//*[@id="contentDiv"]/div[1]/div[3]/ul/li[1]/div/ul/li[{}]/a'.format(
                                    str(c_idx + 1))
                                col = self.driver.find_element_by_xpath(xpath)
                                actions.move_to_element(col).perform()
                                to_be_clicked = WebDriverWait(
                                    self.driver, 120).until(
                                        EC.element_to_be_clickable(
                                            (By.XPATH, xpath)))
                            except TimeoutException:
                                print("\n\nERROR WHILE CLICKING color")
                                continue
                            self.driver.execute_script('arguments[0].click()',
                                                       to_be_clicked)
                            __delay_time__()
                            self.sel = scrapy.Selector(
                                text=self.driver.page_source)

                            # size and img by each color
                            size = self.sel.xpath(
                                '//*[@id="contentDiv"]/div[1]/div[3]/ul/li[2]/span[2]/ul/li/a[not(@class)]/text()'
                            ).extract()
                            color[c]['size'] = size
                            imgs = self.sel.xpath(
                                '//*[@id="imageDiv"]/ul/li/img/@src').extract(
                                )
                            color[c]['img_url'] = imgs

                            # check existance of relevant item
                            try:
                                xpath = '//div[@class="related_evt"]'
                                self.driver.find_element_by_xpath(xpath)
                            except NoSuchElementException:
                                # continue to next color
                                continue

                            # go to relevant item
                            item_li = self.sel.xpath(
                                '//*[@id="referencesListContent"]/ul').extract(
                                )
                            for itemidx in range(len(item_li)):
                                try:
                                    actions = chains(self.driver)
                                    xpath = '//*[@id="referencesListContent"]/ul/li[{}]/a'.format(
                                        str(itemidx + 1))
                                    item = self.driver.find_element_by_xpath(
                                        xpath)
                                    actions.move_to_element(item).perform()
                                    to_be_clicked = WebDriverWait(
                                        self.driver, 120).until(
                                            EC.element_to_be_clickable(
                                                (By.XPATH, xpath)))
                                except NoSuchElementException:
                                    print(
                                        "\n\nERROR WHILE CLICKING relevant item"
                                    )
                                    continue
                                self.driver.execute_script(
                                    'arguments[0].click()', to_be_clicked)
                                __delay_time__()
                                self.sel = scrapy.Selector(
                                    text=self.driver.page_source)

                                # get brand, name, product_num
                                rel_brand = self.sel.xpath(
                                    '//*[@id="contentDiv"]/div[1]/div[1]/h4/a/span/span/text()'
                                ).extract_first()
                                rel_name = self.sel.xpath(
                                    '//*[@id="contentDiv"]/div[1]/div[1]/h4/span/text()'
                                ).extract_first().strip()
                                rel_prod_num = self.sel.xpath(
                                    '//*[@id="contentDiv"]/div[1]/div[1]/p[2]/text()'
                                ).extract_first().strip()
                                color[c]['rel'].append({
                                    'rel_brand':
                                    rel_brand,
                                    'rel_name':
                                    rel_name,
                                    'rel_prod_num':
                                    rel_prod_num
                                })
                                # go back to original item
                                self.driver.get(url)
                                __delay_time__()
                                self.sel = scrapy.Selector(
                                    text=self.driver.page_source)

                        # write progress
                        __record_progress__(prod_num)
                        # write data
                        __record_data__(url, category, brand, name, price,
                                        prod_num, prod_desc, color)
                        # go back to main page
                        self.driver.get(curr_url)
                        __delay_time__()
                        self.sel = scrapy.Selector(
                            text=self.driver.page_source)

                    # click next page
                    xpath = '//*[@id="listBody"]/li[1]/div/a[1]/span[1]/img/@src'
                    prev_page = self.sel.xpath(xpath).extract_first()
                    cnt = 0
                    while True:
                        xpath = '//*[@id="bodyWrap"]/div[1]/div[2]/a[3]'
                        try:
                            to_be_clicked = WebDriverWait(
                                self.driver, 120).until(
                                    EC.element_to_be_clickable(
                                        (By.XPATH, xpath)))
                        except TimeoutException:
                            print("\n\nERROR WHILE CILCKING next page")
                            continue
                        self.driver.execute_script('arguments[0].click()',
                                                   to_be_clicked)
                        __delay_time__()

                        self.sel = scrapy.Selector(
                            text=self.driver.page_source)
                        xpath = '//*[@id="listBody"]/li[1]/div/a[1]/span[1]/img/@src'
                        check = self.sel.xpath(xpath).extract_first()
                        if prev_page != check:
                            break
                        cnt += 1
                        if cnt > 4:
                            last_page = True
                            break
                    print("NEXT PAGE CLICKED\n\n")
                    if last_page == True:
                        break
 def hover_to_click_element(self, locator1, locator2):
     element = WDW(self.driver,
                   10).until(EC.presence_of_element_located(locator1))
     chains(self.driver).move_to_element(element).perform()
     WDW(self.driver,
         10).until(EC.visibility_of_element_located(locator2)).click()
Пример #19
0
 def open_tab(self):
     chains(self.driver).key_down(Keys.CONTROL).send_keys('t').key_up(
         Keys.CONTROL).perform()
 def hover_to_click(self, locator):
     element = WDW(self.driver,
                   10).until(EC.presence_of_element_located(locator))
     chains(self.driver).move_to_element(element).click().perform()
Пример #21
0
    Keys.ARROW_DOWN)
driver.find_element_by_xpath("//input[@id='custCity']").send_keys(Keys.RETURN)
time.sleep(3)
driver.find_element_by_xpath("//input[@id='custAddress']").click()
driver.find_element_by_xpath("//input[@id='custAddress']").send_keys("Madhav")
time.sleep(3)
driver.find_element_by_xpath("//input[@id='custAddress']").send_keys(
    Keys.ARROW_DOWN)
driver.find_element_by_xpath("//input[@id='custAddress']").send_keys(
    Keys.RETURN)
time.sleep(3)
driver.find_element_by_xpath("//input[@id='buildOrderBtn']").click()
time.sleep(3)

#MouseOver Action Code
actions = chains(driver)
val1 = driver.find_element_by_xpath("//a[@id='customize_pizza_1']")
actions.move_to_element(val1).perform()
time.sleep(2)
val2 = wait(driver, 10).until(
    EC.element_to_be_clickable((By.XPATH, "//a[@id='quick_add_1']")))
val2.click()
#MouseOver Action End
#Add Product to Cart End

time.sleep(3)
driver.find_element_by_xpath("//a[@id='checkout']").click()
time.sleep(5)
driver.find_element_by_xpath("//a[@id='crossButtonUpselling']").click()
time.sleep(5)
driver.find_element_by_xpath(
Пример #22
0
    def parse(self, response):
        self.driver.get(response.url)
        self.driver.execute_script(
            "Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});"
        )
        self.driver.execute_script(
            "Object.defineProperty(navigator, 'languages', {get: function() {return ['ko-KR', 'ko']}})"
        )
        self.driver.execute_script(
            "const getParameter = WebGLRenderingContext.getParameter;WebGLRenderingContext.prototype.getParameter = function(parameter) {if (parameter === 37445) {return 'NVIDIA Corporation'} if (parameter === 37446) {return 'NVIDIA GeForce GTX 980 Ti OpenGL Engine';}return getParameter(parameter);};"
        )

        self.sel = scrapy.Selector(text=self.driver.page_source)

        for i in range(2):
            if i != 0:
                continue
            target_idx = str(i + 4)
            try:
                actions = chains(self.driver)
                xpath = '//*[@id="category-list"]/li[{}]/a'.format(target_idx)
                upper_cat = self.driver.find_element_by_xpath(xpath)
                actions.move_to_element(upper_cat).perform()
                to_be_clicked = WebDriverWait(self.driver, 120).until(
                    EC.element_to_be_clickable((By.XPATH, xpath)))
            except TimeoutException:
                print("\n\nTimeoutExceptoin while clicking upper category {}".
                      format(target_idx))
                continue
            self.driver.execute_script('arguments[0].click()', to_be_clicked)
            __delay_time__()
            self.sel = scrapy.Selector(text=self.driver.page_source)
            print('\n\n upper category {} clicked\n\n'.format(target_idx))

            cat_list = self.sel.xpath(
                '//*[@id="category-list"]/li[{}]/ul/li/a/text()'.format(
                    target_idx)).extract()
            cat_list = [cat.strip() for cat in cat_list]

            for catidx, cat in enumerate(cat_list):
                if catidx != 6:
                    # sweater
                    continue
                try:
                    actions = chains(self.driver)
                    xpath = '//*[@id="category-list"]/li[{}]/ul/li[{}]/a'.format(
                        target_idx, str(catidx + 1))
                    category = self.driver.find_element_by_xpath(xpath)
                    actions.move_to_element(category).perform()
                    to_be_clicked = WebDriverWait(self.driver, 120).until(
                        EC.element_to_be_clickable((By.XPATH, xpath)))
                except TimeoutException:
                    print("\n\nTimeoutExceptoin while clicking category {}".
                          format(cat))
                    continue
                self.driver.execute_script('arguments[0].click()',
                                           to_be_clicked)
                __delay_time__()
                self.sel = scrapy.Selector(text=self.driver.page_source)
                print('\n\n category {} clicked\n\n'.format(cat))

                curr_page_num = 1
                while True:
                    # crawl items
                    prod_list = self.sel.xpath(
                        '//*[@id="wrap"]/div/div[1]/section/div[1]/div/figure/a/@href'
                    ).extract()
                    main_prod_page_url = self.driver.current_url
                    for prod in prod_list:
                        url = urljoin('https://www.ssense.com/', prod)
                        if url in self.progress:
                            print("\n\nProduct already in progress {}\n\n".
                                  format(url))
                            continue
                        self.driver.get(url)
                        __delay_time__()
                        self.sel = scrapy.Selector(
                            text=self.driver.page_source)

                        # prod page
                        prod_num = self.sel.xpath(
                            '//span[@class="product-sku"]/text()'
                        ).extract_first()
                        if prod_num is None:
                            prod_num = ''
                        brand = self.sel.xpath(
                            '//h1[@class="product-brand"]/a/text()'
                        ).extract_first()
                        name = self.sel.xpath(
                            '//h2[@class="product-name"]/text()'
                        ).extract_first()
                        price = self.sel.xpath(
                            '//span[@class="price"]/text()').extract_first()
                        prod_desc = self.sel.xpath(
                            '//p[@class="vspace1 product-description-text"]/span/text()'
                        ).extract()
                        prod_desc = ' '.join(prod_desc)
                        prod_desc = prod_desc.replace("'", "")
                        prod_desc = prod_desc.replace('"', '')
                        color = {}
                        c = ''
                        imgs = self.sel.xpath(
                            '//div[@class="product-images-container"]/div/div/div'
                        ).extract()
                        imgs = [im.split('"')[5] for im in imgs]

                        size = self.sel.xpath(
                            '//*[@id="size"]/option/text()').extract()
                        size = [s.strip() for s in size][1:]
                        size = [s.split()[0] for s in size]

                        color[c] = {'img_url': imgs, 'size': size, 'rel': []}

                        # rel page
                        indicator = self.sel.xpath(
                            '//div[@class="related-product-tab inline-block smartphone-portrait-narrow-full-width"]/a/span[1]/text()'
                        ).extract_first()
                        if indicator == 'Styled with':
                            rels = self.sel.xpath(
                                '//div[@class="related-product-container tab-container"]/div[2]/div/div/div/div/a/@href'
                            ).extract()
                            for rel_url in rels:
                                rel_result = {}
                                rel_url = urljoin('https://www.ssense.com/',
                                                  rel_url)
                                self.driver.get(rel_url)
                                __delay_time__()
                                self.sel = scrapy.Selector(
                                    text=self.driver.page_source)

                                # rel prod page
                                rel_result['rel_brand'] = self.sel.xpath(
                                    '//h1[@class="product-brand"]/a/text()'
                                ).extract_first()
                                rel_result['rel_name'] = self.sel.xpath(
                                    '//h2[@class="product-name"]/text()'
                                ).extract_first()
                                rel_prod_num = self.sel.xpath(
                                    '//span[@class="product-sku"]/text()'
                                ).extract_first()
                                if rel_prod_num is None:
                                    rel_prod_num = ''
                                rel_result['rel_prod_num'] = rel_prod_num
                                rel_prod_desc = self.sel.xpath(
                                    '//p[@class="vspace1 product-description-text"]/span/text()'
                                ).extract()
                                rel_prod_desc = ' '.join(rel_prod_desc)
                                rel_prod_desc = rel_prod_desc.replace('"', '')
                                rel_prod_desc = rel_prod_desc.replace("'", "")
                                rel_result['rel_prod_desc'] = rel_prod_desc

                                color[c]['rel'].append(rel_result)

                        # write progress
                        __record_progress__(url)
                        # write data
                        __record_data__(url, cat, brand, name, price, prod_num,
                                        prod_desc, color)

                    # go back to main page
                    self.driver.get(main_prod_page_url)
                    __delay_time__()
                    self.sel = scrapy.Selector(text=self.driver.page_source)

                    # click next page
                    page_list = self.sel.xpath(
                        '//div[@class="span16 text-center"]/nav/ul/li/a/text()'
                    ).extract()
                    next_page_num = curr_page_num + 1
                    if str(next_page_num) not in page_list:
                        # final page
                        print("\n\nFinal Page\n\n")
                        break
                    else:
                        # click next page
                        prev_page_item = self.sel.xpath(
                            '//*[@id="wrap"]/div/div[1]/section/div[1]/div/figure/a/@href'
                        ).extract_first()
                        break_item = False
                        while True:
                            xpath = '//div[@class="span16 text-center"]/nav/ul/li/a[contains(text(), "{}")]'.format(
                                str(next_page_num))
                            try:
                                element_visible = True
                                actions = chains(self.driver)
                                visible_cnt = 0
                                while True:
                                    try:
                                        clickable = self.driver.find_element_by_xpath(
                                            xpath)
                                        break
                                    except NoSuchElementException:
                                        try:
                                            WebDriverWait(
                                                self.driver, 120
                                            ).until(
                                                EC.presence_of_element_located(
                                                    (By.XPATH, xpath)))
                                            continue
                                        except TimeoutException:
                                            print(
                                                "\n\nElement Not Visible.\n\n")
                                            if visible_cnt < 3:
                                                visible_cnt += 1
                                                continue
                                            else:
                                                element_visible = False
                                                break
                                if not element_visible:
                                    break_item = True
                                    print("\n\nMove on to Next Item...\n\n")
                                    break
                                actions.move_to_element(clickable).perform()
                                to_be_clicked = WebDriverWait(
                                    self.driver, 120).until(
                                        EC.element_to_be_clickable(
                                            (By.XPATH, xpath)))
                            except TimeoutException:
                                print("\n\nERROR WHILE CILCKING next page")
                                continue
                            while True:
                                try:
                                    self.driver.execute_script(
                                        'arguments[0].click()', to_be_clicked)
                                    break
                                except StaleElementReferenceException:
                                    to_be_clicked = WebDriverWait(
                                        self.driver, 120).until(
                                            EC.element_to_be_clickable(
                                                (By.XPATH, xpath)))
                                    continue
                            __delay_time__()
                            self.sel = scrapy.Selector(
                                text=self.driver.page_source)
                            curr_item = self.sel.xpath(
                                '//*[@id="wrap"]/div/div[1]/section/div[1]/div/figure/a/@href'
                            ).extract_first()
                            print('\n\n{}\n\n'.format(curr_item))
                            if prev_page_item != curr_item:
                                print("\n\nNEXT PAGE CLICKED: page now {}\n\n".
                                      format(str(next_page_num)))
                                curr_page_num += 1
                                break
                        if break_item:
                            break
Пример #23
0
 def click_out(self):
     chains(self.driver).move_by_offset(50, 50).click().perform()
Пример #24
0
    def parse(self, response):
        self.driver.get(response.url)
        self.driver.execute_script(
            "Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});"
        )
        self.driver.execute_script(
            "Object.defineProperty(navigator, 'languages', {get: function() {return ['ko-KR', 'ko']}})"
        )
        self.driver.execute_script(
            "const getParameter = WebGLRenderingContext.getParameter;WebGLRenderingContext.prototype.getParameter = function(parameter) {if (parameter === 37445) {return 'NVIDIA Corporation'} if (parameter === 37446) {return 'NVIDIA GeForce GTX 980 Ti OpenGL Engine';}return getParameter(parameter);};"
        )

        self.sel = scrapy.Selector(text=self.driver.page_source)
        cat_list = self.sel.xpath(
            '/html/body/div[3]/nav/ul[1]/li[2]/ul/li/a/text()').extract()

        for catidx, category in enumerate(cat_list):
            if catidx < 9:
                # skip 'All'
                continue
            # click category
            try:
                actions = chains(self.driver)
                xpath = '/html/body/div[3]/nav/ul[1]/li[2]/ul/li[{}]/a'.format(
                    str(catidx + 1))
                cat = self.driver.find_element_by_xpath(xpath)
                actions.move_to_element(cat).perform()
                to_be_clicked = WebDriverWait(self.driver, 120).until(
                    EC.element_to_be_clickable((By.XPATH, xpath)))
            except TimeoutException:
                print("\n\nTimeoutExceptoin while clicking category {}".format(
                    category))
                continue
            self.driver.execute_script('arguments[0].click()', to_be_clicked)
            __delay_time__()
            self.sel = scrapy.Selector(text=self.driver.page_source)
            print('\n\n {} clicked\n\n'.format(category))

            curr_page_num = 1
            while True:
                # crawl items
                item_lists = self.sel.xpath(
                    '//*[@id="dspGood"]/li/@data-prdno').extract()
                for li_idx, prod_num in enumerate(item_lists):
                    curr_url = self.driver.current_url
                    xpath = '//*[@id="dspGood"]/li[{}]/a/@onclick'.format(
                        str(li_idx + 1))
                    script_res = self.sel.xpath(xpath).extract_first().split(
                        "'")
                    brand = script_res[1]
                    prod_num = script_res[3]
                    if prod_num in self.progress:
                        continue
                    url = "http://www.ssfshop.com/{}/{}/good?dspCtgryNo=&brandShopNo=&brndShopId=".format(
                        brand, prod_num)
                    print('\n\n\n\n{}\n\n\n\n'.format(url))
                    while True:
                        try:
                            self.driver.get(url)
                            break
                        except TimeoutException:
                            print(curr_url)
                            try:
                                self.driver.get(curr_url)
                                break
                            except TimeoutException:
                                continue
                    __delay_time__()
                    self.sel = scrapy.Selector(text=self.driver.page_source)

                    # scrape page
                    brand = self.sel.xpath(
                        '/html/body/div[3]/div[1]/section[2]/div[1]/div[2]/h3/a/text()'
                    ).extract_first().strip().split('\xa0')[0]
                    name = self.sel.xpath(
                        '/html/body/div[3]/div[1]/section[2]/div[1]/div[2]/h1/text()'
                    ).extract()[-1].strip()
                    price = self.sel.xpath(
                        '/html/body/div[3]/div[1]/section[2]/div[1]/div[2]/div[1]/em/text()'
                    ).extract_first()
                    if '\xa0' in price:
                        price = price.strip('\xa0')
                    prod_desc = self.sel.xpath(
                        '//*[@id="about"]/div/text()').extract()
                    if prod_desc is None:
                        prod_desc = ''
                    else:
                        prod_desc = (' '.join(prod_desc)).strip()
                    color = {}
                    c = ''
                    color[c] = {'img_url': [], 'size': [], 'rel': []}
                    size_list = self.sel.xpath(
                        '//div[@class="option"]/ul/li/a/em/text()').extract()
                    size = []
                    for idx, s in enumerate(size_list):
                        if '(' in s.strip() or ')' in s.strip():
                            continue
                        size.append(s.strip().strip('/').strip())
                    img_script_list = self.sel.xpath(
                        '/html/body/div[3]/div[1]/section[2]/div[1]/div[1]/script/text()'
                    ).extract()
                    imgs = []
                    for img_script in img_script_list:
                        temp = img_script.split('\\"')
                        if len(temp) == 1:
                            continue
                        else:
                            imgs.append(temp[1])
                    color[c]['img_url'] = imgs
                    color[c]['size'] = size

                    # write progress
                    __record_progress__(prod_num)
                    # write data
                    __record_data__(url, category, brand, name, price,
                                    prod_num, prod_desc, color)
                    # go back to main page
                    self.driver.get(curr_url)
                    __delay_time__()
                    self.sel = scrapy.Selector(text=self.driver.page_source)

                # click next page
                page_list = self.sel.xpath(
                    '//div[@id="pagingArea"]/a/text()').extract()
                next_page_num = curr_page_num + 1
                if str(next_page_num) not in page_list:
                    next_btns_list = self.sel.xpath(
                        '//div[@id="pagingArea"]/a/@alt').extract()
                    if '다음페이지' in next_btns_list:
                        # click next button
                        prev_page_item = self.sel.xpath(
                            '//*[@id="dspGood"]/li[1]/a/img/@src'
                        ).extract_first()
                        while True:
                            xpath = '//div[@id="pagingArea"]/a[contains(@alt, "{}")]'.format(
                                '다음페이지')
                            try:
                                to_be_clicked = WebDriverWait(
                                    self.driver, 120).until(
                                        EC.element_to_be_clickable(
                                            (By.XPATH, xpath)))
                            except TimeoutException:
                                print("\n\nERROR WHILE CILCKING Next Button")
                                continue
                            self.driver.execute_script('arguments[0].click()',
                                                       to_be_clicked)
                            __delay_time__()
                            self.sel = scrapy.Selector(
                                text=self.driver.page_source)
                            curr_item = self.sel.xpath(
                                '//div[@id="pagingArea"]/a/text()'
                            ).extract_first()
                            if prev_page_item != curr_item:
                                print(
                                    "\n\n\n\n\nNEXT BUTTON CLICKED: page now {}\n\n"
                                    .format(str(curr_page_num + 1)))
                                curr_page_num += 1
                                break
                    else:
                        # final page
                        break

                else:
                    # click next page
                    prev_page_item = self.sel.xpath(
                        '//*[@id="dspGood"]/li[1]/a/img/@src').extract_first()
                    while True:
                        xpath = '//div[@id="pagingArea"]/a[contains(text(), "{}")]'.format(
                            str(next_page_num))
                        try:
                            to_be_clicked = WebDriverWait(
                                self.driver, 120).until(
                                    EC.element_to_be_clickable(
                                        (By.XPATH, xpath)))
                        except TimeoutException:
                            print("\n\nERROR WHILE CILCKING next page")
                            continue
                        self.driver.execute_script('arguments[0].click()',
                                                   to_be_clicked)
                        __delay_time__()
                        self.sel = scrapy.Selector(
                            text=self.driver.page_source)
                        curr_item = self.sel.xpath(
                            '//*[@id="dspGood"]/li[1]/a/img/@src'
                        ).extract_first()
                        if prev_page_item != curr_item:
                            print(
                                "\n\n\n\n\nNEXT PAGE CLICKED: page now {}\n\n".
                                format(str(next_page_num)))
                            curr_page_num += 1
                            break
Пример #25
0
    def parse(self, response):
        self.driver.get(response.url)
        self.driver.execute_script(
            "Object.defineProperty(navigator, 'plugins', {get: function() {return[1, 2, 3, 4, 5];},});"
        )
        self.driver.execute_script(
            "Object.defineProperty(navigator, 'languages', {get: function() {return ['ko-KR', 'ko']}})"
        )
        self.driver.execute_script(
            "const getParameter = WebGLRenderingContext.getParameter;WebGLRenderingContext.prototype.getParameter = function(parameter) {if (parameter === 37445) {return 'NVIDIA Corporation'} if (parameter === 37446) {return 'NVIDIA GeForce GTX 980 Ti OpenGL Engine';}return getParameter(parameter);};"
        )

        self.sel = scrapy.Selector(text=self.driver.page_source)

        caturls = self.sel.xpath(
            '//ul[@class="cmenu"]/li/ul/li/a/@href').extract()
        catlist = self.sel.xpath(
            '//ul[@class="cmenu"]/li/ul/li/a/text()').extract()
        for catidx, cat in enumerate(catlist):
            category = cat.strip()
            url = urljoin('http://www.thehyundai.com/', caturls[catidx])
            self.driver.get(url)
            __delay_time__()
            self.sel = scrapy.Selector(text=self.driver.page_source)

            print("\n\n{} clicked\n\n".format(category))

            # brands
            brand_lists = self.sel.xpath(
                '//ul[@class="brand-list-wrap"]/li/a/text()').extract()
            for brandidx, brand in enumerate(brand_lists):
                brand = brand.strip().split("(")[0].strip()

                # check if 'load more' element exists
                elment_exists = False
                try:
                    xpath = '//a[@class="btn-more-tog"]'
                    self.driver.find_element_by_xpath(xpath)
                    element_exists = True
                except NoSuchElementException:
                    element_exists = False

                if element_exists == True:
                    load_more_text = self.sel.xpath(
                        '//a[@class="btn-more-tog"]/text()').extract_first()
                    if load_more_text == '더보기':
                        element_exists = True
                    else:
                        element_exists = False

                if element_exists == True:
                    # click
                    try:
                        actions = chains(self.driver)
                        xpath = '//a[@class="btn-more-tog"]'
                        load_more = self.driver.find_element_by_xpath(xpath)
                        actions.move_to_element(load_more).perform()
                        to_be_clicked = WebDriverWait(self.driver, 120).until(
                            EC.element_to_be_clickable((By.XPATH, xpath)))
                    except TimeoutException:
                        print(
                            "\n\nTimeoutExceptoin while clicking {} load more".
                            format(category))
                        continue
                    self.driver.execute_script('arguments[0].click()',
                                               to_be_clicked)
                    __delay_time__()
                    self.sel = scrapy.Selector(text=self.driver.page_source)
                    print('\n\n {} load more clicked\n\n'.format(category))

                # click brand
                try:
                    actions = chains(self.driver)
                    if element_exists:
                        xpath = '//ul[@class="brand-list-wrap open"]/li[{}]/a'.format(
                            str(brandidx + 1))
                    else:
                        xpath = '//ul[@class="brand-list-wrap"]/li[{}]/a'.format(
                            str(brandidx + 1))
                    brand_click = self.driver.find_element_by_xpath(xpath)
                    actions.move_to_element(brand_click).perform()
                    to_be_clicked = WebDriverWait(self.driver, 120).until(
                        EC.element_to_be_clickable((By.XPATH, xpath)))
                except TimeoutException:
                    print("\n\nTimeoutExceptoin while clicking {} ".format(
                        brand))
                    continue
                self.driver.execute_script('arguments[0].click()',
                                           to_be_clicked)
                __delay_time__()
                self.sel = scrapy.Selector(text=self.driver.page_source)
                print('\n\n {} clicked\n\n'.format(brand))

                curr_page_num = 1
                while True:
                    # fetch product data
                    prod_urls = self.sel.xpath(
                        '//ul[@class="product-list type1"]/li/div/div[1]/a/@href'
                    ).extract()
                    names = self.sel.xpath(
                        '//ul[@class="product-list type1"]/li/div/div[1]/a/img/@alt'
                    ).extract()
                    imgs = self.sel.xpath(
                        '//ul[@class="product-list type1"]/li/div/div[1]/a/img/@src'
                    ).extract()
                    prices = self.sel.xpath(
                        '//ul[@class="product-list type1"]/li/div/div[2]/div/span[1]/text()'
                    ).extract()
                    for prod_idx, produ in enumerate(prod_urls):
                        prod_url = urljoin('https://www.thehyundai.com/',
                                           produ)
                        name = names[prod_idx]
                        img = imgs[prod_idx]
                        prod_desc = ''
                        prod_num = ''
                        price = prices[prod_idx]
                        color = {}
                        c = ''
                        color[c] = {'img_url': img, 'size': '', 'rel': []}

                        # write progress
                        __record_progress__(prod_url)
                        # write data
                        __record_data__(prod_url, category, brand, name, price,
                                        prod_num, prod_desc, color)

                    # next page
                    page_list = self.sel.xpath(
                        '//*[@id="container"]/div[2]/div/div[2]/ul/li/a/text()'
                    ).extract()
                    next_page_num = curr_page_num + 1
                    if str(next_page_num) not in page_list:
                        next_button_exists = False
                        try:
                            xpath = '//*[@id="container"]/div[2]/div/div[2]/ul/li/a[@class="direction next_1"]'
                            self.driver.find_element_by_xpath(xpath)
                            next_button_exists = True
                        except NoSuchElementException:
                            next_button_exists = False
                        if next_button_exists:
                            # click next button
                            prev_page_item = self.sel.xpath(
                                '//*[@id="product-list"]/li[1]/div/div[2]/a/text()'
                            ).extract_first()
                            while True:
                                xpath = '//*[@id="container"]/div[2]/div/div[2]/ul/li/a[@class="direction next_1"]'
                                try:
                                    to_be_clicked = WebDriverWait(
                                        self.driver, 120).until(
                                            EC.element_to_be_clickable(
                                                (By.XPATH, xpath)))
                                except TimeoutException:
                                    print(
                                        "\n\nERROR WHILE CILCKING Next Button after {}"
                                        .format(str(curr_page_num)))
                                    continue
                                self.driver.execute_script(
                                    'arguments[0].click()', to_be_clicked)
                                __delay_time__()
                                self.sel = scrapy.Selector(
                                    text=self.driver.page_source)
                                curr_item = self.sel.xpath(
                                    '//*[@id="product-list"]/li[1]/div/div[2]/a/text()'
                                ).extract_first()
                                if prev_page_item != curr_item:
                                    print(
                                        "\n\nNEXT BUTTON CLICKED: page now {}\n\n"
                                        .format(str(curr_page_num + 1)))
                                    curr_page_num += 1
                                    break
                        else:
                            # final page
                            break

                    else:
                        # click next page
                        prev_page_item = self.sel.xpath(
                            '//*[@id="product-list"]/li[1]/div/div[2]/a/text()'
                        ).extract_first()
                        while True:
                            xpath = '//*[@id="container"]/div[2]/div/div[2]/ul/li/a[contains(text(), "{}")]'.format(
                                str(next_page_num))
                            try:
                                to_be_clicked = WebDriverWait(
                                    self.driver, 120).until(
                                        EC.element_to_be_clickable(
                                            (By.XPATH, xpath)))
                            except TimeoutException:
                                print("\n\nERROR WHILE CILCKING next page")
                                continue
                            self.driver.execute_script('arguments[0].click()',
                                                       to_be_clicked)
                            __delay_time__()
                            self.sel = scrapy.Selector(
                                text=self.driver.page_source)
                            curr_item = self.sel.xpath(
                                '//*[@id="product-list"]/li[1]/div/div[2]/a/text()'
                            ).extract_first()
                            if prev_page_item != curr_item:
                                print("\n\nNEXT PAGE CLICKED: page now {}\n\n".
                                      format(str(next_page_num)))
                                curr_page_num += 1
                                break