def getDetails(self, url): url = self.pathDetails + url if self.pathDetails not in url else url detail = Client.getUrl( (self.urlDomain + url if 'http' not in url else url)) bsDetail = BeautifulSoup(detail.text, 'lxml') response = { 'title': bsDetail.article.find('div', { 'class': 'product_main' }).h1.text, 'thumbnail': self.urlDomain + bsDetail.article.find('div', { 'class': 'thumbnail' }).div.div.img['src'].replace('../', ''), 'price': bsDetail.article.find('p', { 'class': 'price_color' }).text, 'stock': bsDetail.article.find('p', { 'class': 'availability' }).text.strip(), 'category': bsDetail.find('ul', { 'class': 'breadcrumb' }).find_all('li')[2].text.strip(), 'description': bsDetail.article.find('p', { 'class': '' }).text.strip() if bsDetail.article.find('p', {'class': ''}) else '', 'upc': bsDetail.article.find('table', { 'class': 'table table-striped' }).td.text, 'url': url } return response
def process(self): try: DB().get_connection() site = Client.getUrl(self.url) bsObject = BeautifulSoup(site.text, 'lxml') if not self.isNext: self.category( bsObject.find_all('div', {'class': 'side_categories'})) article = self.article(bsObject.find('section')) next = article.find('ul', { 'class': 'pager' }).find('li', {'class': 'next'}) if next is not None: urlNext = next.a['href'] print(urlNext) Process( self.urlDomain + (self.pathDetails if self.pathDetails not in urlNext else '') + urlNext, True).process() except HTTPError as eh: print(self.i18n.get('error_proceso'), eh) except Exception as e: print(self.i18n.get('error'), e)