Пример #1
0
    def start_requests(self):
        for category in Category.objects.all():
            if category.url:
                parse_urls = []
                driver = webdriver.PhantomJS(
                    executable_path=settings.PHANTOMJS)
                driver.set_window_size(1320, 950)
                driver.get(category.url)
                time.sleep(5)
                try:
                    pagination = driver.find_elements_by_xpath(
                        '//ul[@class="pagination"]/li')
                except:
                    pagination = ""
                if len(pagination) > 2:
                    last = pagination[-2].text
                else:
                    last = ""
                if last:
                    page_list = range(int(last) + 1)
                    for i in page_list:
                        if i == 0:
                            continue
                        url = category.url + "page-" + str(i) + "/"
                        parse_urls.append(url)

                parse_urls.append(category.url)
                driver.quit()
                for url in parse_urls:
                    driver = webdriver.PhantomJS(
                        executable_path=settings.PHANTOMJS)
                    driver.set_window_size(1320, 950)
                    driver.get(url)
                    time.sleep(5)
                    for p in driver.find_elements_by_xpath(
                            '//div[@class="good-i-t"]/a'):
                        name = p.text
                        url = p.get_attribute('href')
                        slug = slugify(name)
                        prod = Product.objects.filter(category=category,
                                                      slug=slug,
                                                      name=name).first()
                        print("***" + name)
                        print("***" + url)
                        print("***" + slug)

                        if not prod:
                            prod = Product(category=category,
                                           slug=slug,
                                           name=name,
                                           url=url)
                            prod.save()
                        else:
                            prod.url = url
                            prod.save()