def parseData(self, content, url): listMobile = [] listProduct = content.find('ul', attrs={'class': 'homeproduct'}) temp = listProduct.findAll('li') allProducts = [x.find('a', href=True) for x in temp] if len(allProducts) == 0: raise NoProductFoundException for a in allProducts: try: image_html = ScrapEngine.hideInvalidTag( a.find('img'), ['strike']) name_html = ScrapEngine.hideInvalidTag(a.find('h3'), ['strike']) price_html = ScrapEngine.hideInvalidTag( a.find('div', attrs={'class': 'price'}), ['strike', 'span']) image_src = "NA" if 'src' in image_html.attrs: image_src = image_html['src'] elif 'data-original' in image_html.attrs: image_src = image_html['data-original'] name = ScrapEngine.processString(name_html.getText(), self.ignoreTerm) name_idx = name.find(" ") price = ScrapEngine.processString(price_html.getText(), self.ignoreTerm) href = "n.a" href = urljoin(url, a['href']) try: listMobile.append( PhoneData(brand=name, model="", price=price, vendor="thegioididong", info={ "url": href, "img": image_src })) except PhoneDataInvalidException as error: print("Unable to parse: " + name + ": " + price + ". Error:" + str(error)) pass except Exception as e: print("Error: " + str(e)) pass print("Done with: " + url) print("Found {} items".format(str(len(listMobile)))) return listMobile
def parseData(self, content, url): listMobile = [] listProduct = content.find('div', attrs={'class': 'product-list'}) allProducts = listProduct.findAll('div', attrs={'class': 'list-item'}) if len(allProducts) == 0: raise NoProductFoundException for a in allProducts: image_html = ScrapEngine.hideInvalidTag(a.find('img'), ['strike']) name_html = ScrapEngine.hideInvalidTag( a.find('div', attrs={'class': 'product-name'}), ['strike']) price_html = ScrapEngine.hideInvalidTag( a.find('div', attrs={'class': 'product-price'}), ['strike']) try: image_src = image_html['src'] name = ScrapEngine.processString(name_html.getText(), self.ignoreTerm) name_idx = name.find(" ") price = ScrapEngine.processString(price_html.getText(), self.ignoreTerm) href = "n.a" temp = name_html.find('a', href=True) href = urljoin(url, temp['href']) try: listMobile.append( PhoneData(brand=name, model="", price=price, vendor="hoanghaMobile", info={ "url": href, "img": image_src })) except PhoneDataInvalidException as error: print("Unable to parse: " + name + ": " + price + ". Error:" + str(error)) pass except Exception as e: print("Error: " + str(e)) pass print("Done with: " + url) return listMobile
def test_processString(self): test = " hello world 213 #$%@*^)@ " ignore = ["!", "@", "#", "$", "%", "^", "&", "*", ")", "("] output = "hello world 213" self.assertEqual(output, ScrapEngine.processString(test, ignore))