Пример #1
0
    def handle(self, *args, **options):
        print('Clearing DB')
        Category.objects.all().delete()
        Product.objects.all().delete()

        print('Start importing from excel %s' % DATA_DIR)

        wb = load_workbook(DATA_DIR + '/price.xlsx')
        sheet = wb.get_sheet_by_name(wb.get_sheet_names()[0])
        cat = None
        for cnt in range(1, sheet.max_row, 1):
            item = sheet.cell(row=cnt, column=3).value
            id = sheet.cell(row=cnt, column=2).value
            if id == None:
                print('Create a new category')
                cat = Category()
                cat.name = item
                cat.save()
            else:
                print('Create a new good')
                if cat:
                    p = Product()
                    p.name = item
                    p.category = cat
                    p.save()
Пример #2
0
    def handle(self, *args, **options):
        print('Clearing DB ...')
        # удаляем записи и картинки
        Category.objects.all().delete()
        SubCategory.objects.all().delete()
        try:
            shutil.rmtree('%s/media' % BASE_DIR)
        except FileNotFoundError:
            pass

        # парсим главную страницу
        base_url = 'https://west-info.biz/katalog-predpriyatij/'
        print(f'Start import from {base_url}')
        res = requests.get(base_url)
        soup = bs(res.text, 'html.parser')

        # находим нужный контент
        content = soup.find('ul', {'class': 'submenu'})
        for item in content.findAll('li', {'class': 'submenu_item'}):
            c = Category()
            c.name = item.find('a').text
            c.save()
            print(f'Import {c.name}')
            subcategories = item.findAll('a', {'class': 'sub2menu_link'})
            for k in subcategories:
                sub = SubCategory()
                sub.name = k.text
                sub.category = c
                sub.save()
                print(f'Import {sub.name}')
Пример #3
0
    def handle(self, *args, **options):
        print('Clearing DB')
        # удаляем записи и картинки
        Category.objects.all().delete()
        Product.objects.all().delete()
        try:
            shutil.rmtree('%s/media/product' % BASE_DIR)
        except FileNotFoundError:
            pass

        # достаем главную страницу и парсим
        URL = 'https://tainabox.com.ua'
        print('Start importing from %s' % URL)
        rez = requests.get(URL)
        # rez = requests.get(URL, verify=False)
        soup = BeautifulSoup(rez.text, 'html.parser')

        # находим нужный див и в нем картинки
        content = soup.find('div', {'id': 'header-custom-middle-block'})
        for menu in content.findAll('ul', {'class': 'header__mid-menu'}):
            for category in menu.findAll('span', {'class': 'hlink'}):
                time.sleep(random.randint(1, 5))
                c = Category()
                c.name = category.text
                c.save()
                link = base64.b64decode(
                    category.attrs['data-href']).decode("utf-8")
                get_products(link, c)
            for category in menu.findAll('a'):
                print(category.text)
                c = Category()
                c.name = category.text
                c.save()
                get_products(category['href'], c)
Пример #4
0
    def handle(self, *args, **options):
        print('Clearing DB')
        # удаляем записи и картинки
        Category.objects.all().delete()
        SubCategory.objects.all().delete()
        Product.objects.all().delete()
        shutil.rmtree('%s/media' % BASE_DIR)

        # достаем главную страницу и парсим
        URL = 'https://gastronoma.net'
        print('Start importing from %s' % URL)
        rez = requests.get(URL, verify=False)
        soup = BeautifulSoup(rez.text, 'html.parser')

        # находим нужный див и в нем картинки
        content = soup.find('div',{'class': 'body_20'})
        for img in content.findAll('img'):
            c = Category()
            c.name = img.get('alt')
            img_url = 'https://gastronoma.net/%s' % img.get('src')
            img_response = requests.get(img_url, stream=True, verify=False)
            # сохраняем временный файл
            with open('tmp.png', 'wb') as out_file:
                shutil.copyfileobj(img_response.raw, out_file)
            # читаем временный файл и загружаем его программно в модель
            with open('%s/tmp.png' % BASE_DIR, 'rb') as img_file:
                c.image.save('cat.png', File(img_file), save=True)
            c.save()
            # забираем подкатегории
            for subcat in img.find_parent('tr').find('div').findAll('a'):
                sc = SubCategory()
                sc.category = c
                sc.name = subcat.text
                sc.save()
                get_products(c,sc,subcat.get('href'))
Пример #5
0
    def handle(self, *args, **options):
        print('Clearing DB')
        # удаляем записи и картинки
        Category.objects.all().delete()
        SubCategory.objects.all().delete()
        Product.objects.all().delete()
        try:
            shutil.rmtree('%s/media/category' %
                          BASE_DIR)  # очищает католог от картинок
        except:
            pass

        # достаем главную страницу и парсим
        URL = 'https://gastronoma.net'
        print('Start importing from %s' % URL)
        requests.packages.urllib3.disable_warnings(
            category=InsecureRequestWarning)  # отключаем вывод предупреждения
        # о  необходимости проверять сайт (varify=False)
        rez = requests.get(URL, verify=False)
        soup = BeautifulSoup(rez.text, 'html.parser')

        # находим нужный div и в нем картинки
        content = soup.find('div', {'class': 'body_20'})
        for img in content.findAll('img'):
            c = Category()
            c.name = img.get('alt')
            img_url = 'https://gastronoma.net/%s' % img.get('src')
            requests.packages.urllib3.disable_warnings(
                category=InsecureRequestWarning)
            img_response = requests.get(img_url, stream=True, verify=False)
            # сохраняем временный файл
            with open(
                    'tmp.png', 'wb'
            ) as out_file:  # создается временный файл 'b' - binary, для записи 'w'-write
                shutil.copyfileobj(img_response.raw, out_file)
            # читаем временный файл и загружаем его программно в модель
            with open('%s/tmp.png' % BASE_DIR, 'rb') as img_file:
                c.image.save('cat.png', File(img_file), save=True)
            c.save()
            # забираем подкатегории
            for subcat in img.find_parent('tr').find('div').findAll('a'):
                sc = SubCategory()
                sc.name = subcat.text
                sc.category = c
                sc.save()
                get_products(c, sc, subcat.get('href'))
            print('Saving... %s' % c.name)
Пример #6
0
    def handle(self, *args, **options):
        print('Clear DB')
        Category.objects.all().delete()
        Product.objects.all().delete()

        print('Start import from excel %s' % DATA_DIR)
        wb = load_workbook((DATA_DIR + '\\price.xlsx'))
        worksheet = wb.get_sheet_by_name(wb.get_sheet_names()[0])
        category = None
        for cnt in range(1, worksheet.max_row + 1):
            item = worksheet.cell(row=cnt, column=5).value
            cat = worksheet.cell(row=cnt, column=1).value
            if item == None:
                category = Category()
                category.name = cat
                category.save()
                print('Create category')
            else:
                if category:
                    product = Product()
                    product.name = item
                    product.category = category
                    product.save()
                    print('Create item')
Пример #7
0
    def handle(self, *args, **options):
        print('Clearing DB ...')
        # удаляем записи и картинки
        Category.objects.all().delete()
        SubCategory.objects.all().delete()
        Company.objects.all().delete()
        try:
            shutil.rmtree('%s/media' % BASE_DIR)
        except FileNotFoundError:
            pass

        # парсим главную страницу
        base_url = 'https://west-info.biz/katalog-predpriyatij/'
        print(f'Start import from {base_url}')
        res = requests.get(base_url)
        soup = BeautifulSoup(rez.text, 'html.parser')

        # находим нужный контент
        categories = soup.findAll('li', {'class': 'submenu_item'})
        for it in categories[:5]:
            c = Category()
            c.name = it.find('a').text
            c.save()
            print(f'Import {c.name}')
            subcategories = it.findAll('a', {'class': 'sub2menu_link'})
            for kat in subcategories:
                sub = SubCategory()
                sub.name = kat.text
                sub.category = c
                sub.save()
                print(f'Import {sub.name}')              
                new_url = f"https://west-info.biz/katalog-predpriyatij{k['href']}"
                catalog = requests.get(new_url)
                new_soup = bs(catalog.text, 'html.parser')
                div = new_soup.findAll('div', {'class': 'teaser-item'})
                for item in div:
                    firm_name = item.find('h2', {'class': 'pos-title'})
                    firm_description = item.find('p')
                    firm_city = item.find('div', {'class': 'element element-text'})
                    firm_adress = item.findAll('div', {'class': 'element element-text'})
                    firm_phones = item.find('div', {'class': 'element element-text last'})
                    a = item.findAll('img')
                    for link in a:
                        print(link['src'])

                    if firm_phones:
                        phones_list = firm_phones.text.replace(' ', '').replace(',', ' ').replace(';', ' ').split()
                    com = Company()
                    if firm_name:
                        com.name = firm_name.text
                    if firm_description:
                        com.description = firm_description.text
                    if firm_city:
                        com.city = firm_city.text
                    if firm_adress and len(firm_adress) >= 2:
                        com.adress = firm_adress[1].text
                    else:
                        com.adress = '-'
                    
                    for z in phones_list:
                        com.phone = p
                    com.category = c
                    com.sub_category = sub
                    com.save()
                    print(f'{com.name} save...')