def _get_product_soup(self): start_url = self.vpart_url soup = www.get_soup(start_url) ptable = soup.find("table", id=re.compile(r"ctl00_ProductList")) if ptable is None: raise ValueError("No result list founds for " + self.vpno) rows = ptable.find_all("td", class_=re.compile(r"tableNode")) if not rows: raise ValueError("No results founds for " + self.vpno) product_url = None for row in rows: product_link = row.find("a", class_="highlight") pno = product_link.text.strip() if pno.strip() == self.vpno.strip(): product_url = urlparse.urljoin(self.url_base, product_link.attrs["href"]) break if not product_url: raise ValueError("Exact match not found for " + self.vpno) soup = www.get_soup(product_url) if soup is None: logger.error("Unable to open TI product page : " + self.vpno) return return soup
def _get_product_soup(self): start_url = self.vpart_url soup = www.get_soup(start_url) ptable = soup.find('table', id=re.compile(r'ctl00_ProductList')) if ptable is None: raise ValueError("No result list founds for " + self.vpno) rows = ptable.find_all('td', class_=re.compile(r'tableNode')) if not rows: raise ValueError("No results founds for " + self.vpno) product_url = None for row in rows: product_link = row.find('a', class_='highlight') pno = product_link.text.strip() if pno.strip() == self.vpno.strip(): product_url = urlparse.urljoin(self.url_base, product_link.attrs['href']) break if not product_url: raise ValueError("Exact match not found for " + self.vpno) soup = www.get_soup(product_url) if soup is None: logger.error("Unable to open TI product page : " + self.vpno) return return soup
def test_www_errors(): with pytest.raises(HTTPError): www.get_soup('http://httpstat.us/404') with pytest.raises(URLError): www.get_soup('httpd://httpstat.us/404') result = www.urlopen('http://httpstat.us/500') assert result is None
def _process_resultpage_row(self, row, ident): d, v, f = parse_ident(ident) sanitycheck = self._get_device_searchparams(d)[1] name_cell = row.find(attrs={'class': 'name'}) link = name_cell.find('a') name = link.contents[0] if not sanitycheck(name, d, v, f): return None o = urlparse(link.attrs['href']) uri = o.scheme + "://" + o.netloc + o.path try: part = self._partclass(vpno=None, ident=ident, vendor=self, shell_only=True) response = get_soup(uri) part.load_from_response(response) except: raise if part.vqtyavail is None: ns = False else: ns = True unitp = part.prices[0].unit_price minqty = part.abs_moq raw = part.raw return SearchPart(part.vpno, part.mpartno, None, ns, unitp, minqty, raw)
def _get_search_vpnos(self, device, value, footprint): if value.strip() == "": return None, "NOVALUE" device, value, footprint = self._search_preprocess(device, value, footprint) url = urlparse.urljoin(self._url_base, "Search.aspx?k={0}&pt=-1".format(urllib.quote_plus(value))) soup = www.get_soup(url) if soup is None: return None, "URL_FAIL" parts = [] strategy = "" for soup in self._get_search_soups(soup): sr = self._process_search_soup(soup) if sr.success is True: if sr.parts: parts.extend(sr.parts) strategy += ", " + sr.strategy strategy = "." + strategy if not len(parts): return None, strategy + ":NO_RESULTS:COLLECTED" parts = self._prefilter_parts(parts, value) if not len(parts): return None, strategy + ":NO_RESULTS:PREFILTER" sr = self._filter_results(parts, value, footprint) if sr.parts: pnos = list(set(sr.parts)) pnos = map(lambda x: html_parser.unescape(x), pnos) return pnos, ":".join([strategy, sr.strategy]) return None, strategy + ":NO_RESULTS:POSTFILTER"
def _get_search_vpnos(self, device, value, footprint): if value.strip() == '': return None, 'NOVALUE' device, value, footprint = \ self._search_preprocess(device, value, footprint) url = urlparse.urljoin( self._url_base, "Search.aspx?k={0}&pt=-1".format(urllib.quote_plus(value))) soup = www.get_soup(url) if soup is None: return None, 'URL_FAIL' parts = [] strategy = '' for soup in self._get_search_soups(soup): sr = self._process_search_soup(soup) if sr.success is True: if sr.parts: parts.extend(sr.parts) strategy += ', ' + sr.strategy strategy = '.' + strategy if not len(parts): return None, strategy + ':NO_RESULTS:COLLECTED' parts = self._prefilter_parts(parts, value) if not len(parts): return None, strategy + ':NO_RESULTS:PREFILTER' sr = self._filter_results(parts, value, footprint) if sr.parts: pnos = list(set(sr.parts)) pnos = map(lambda x: html_parser.unescape(x), pnos) return pnos, ':'.join([strategy, sr.strategy]) return None, strategy + ':NO_RESULTS:POSTFILTER'
def test_cached_fetcher(): test_url = 'http://www.google.com' if six.PY3: filepath = md5(test_url.encode('utf-8')).hexdigest() else: filepath = md5(test_url).hexdigest() fs = www.cached_fetcher.cache_fs if fs.exists(filepath): fs.remove(filepath) soup = www.get_soup('http://www.google.com') assert soup is not None assert fs.exists(filepath)
def _get_search_results(self, sparams, ident): params = {'limit': 100, 'route': 'product/search'} params.update(sparams) params = urlencode(params) url = '?'.join([self._searchurl_base, params]) soup = get_soup(url) parts = [] strategy = '' for soup in self._get_search_soups(soup): sr = self._process_search_soup(soup, ident) if sr.success is True: if sr.parts: parts.extend(sr.parts) strategy += ', ' + sr.strategy strategy = '.' + strategy if not len(parts): return None, strategy + ':NO_RESULTS:COLLECTED' return parts, strategy