def _parse_features(self, response): try: feature_list = [] js_data = self.parse_js_data(response) features = js_data['description']['bullets'] for feat in features: feature = feat['value'] if ':' in feature: feature_title = feature.split(':')[0] feature_content = clean_text(self, feature.split(':')[1]) feature = {feature_title: feature_content} feature_list.append(feature) else: break return feature_list except Exception as e: self.log("Error while forming request for base product data: {}".format(traceback.format_exc()), WARNING) return None
def _parse_features(self, response): features = [] features_name = response.xpath( '//ul[@id="productSpecsContainer"]' '/li//label[contains(@for, "product_spec")]/text()').extract() for f_name in features_name: f_content = response.xpath('//ul[@id="productSpecsContainer"]' '/li/div[contains(@id, "product_spec")]' '/*[@aria-label="%s"]' '//text()' % f_name).extract() f_content = clean_list(self, f_content) if len(f_content) > 1: f_content_title = response.xpath( '//ul[@id="productSpecsContainer"]' '/li/div[contains(@id, "product_spec")]' '/*[@aria-label="%s"]' '//span[@class="strong"]/text()' % f_name).extract() f_content_title = clean_list(self, f_content_title) f_content_text = response.xpath( '//ul[@id="productSpecsContainer"]' '/li/div[contains(@id, "product_spec")]' '/*[@aria-label="%s"]' '//span[not(contains(@class,"strong"))]' '/text()' % f_name).extract() f_content_text = clean_list(self, f_content_text) for f_c_title in f_content_title: index = f_content_title.index(f_c_title) feature = { f_c_title.replace(":", ""): f_content_text[index] } features.append(feature) else: f_content = f_content[0] f_content = clean_text(self, f_content) feature = {f_name: f_content} features.append(feature) return features
def _parse_features(self, response): features = [] features_name = response.xpath('//ul[@id="productSpecsContainer"]' '/li//label[contains(@for, "product_spec")]/text()').extract() for f_name in features_name: f_content = response.xpath('//ul[@id="productSpecsContainer"]' '/li/div[contains(@id, "product_spec")]' '/*[@aria-label="%s"]' '//text()' % f_name).extract() f_content = clean_list(self, f_content) if len(f_content) > 1: f_content_title = response.xpath('//ul[@id="productSpecsContainer"]' '/li/div[contains(@id, "product_spec")]' '/*[@aria-label="%s"]' '//span[@class="strong"]/text()' % f_name).extract() f_content_title = clean_list(self, f_content_title) f_content_text = response.xpath('//ul[@id="productSpecsContainer"]' '/li/div[contains(@id, "product_spec")]' '/*[@aria-label="%s"]' '//span[not(contains(@class,"strong"))]' '/text()' % f_name).extract() f_content_text = clean_list(self, f_content_text) for f_c_title in f_content_title: index = f_content_title.index(f_c_title) feature = {f_c_title.replace(":", ""): f_content_text[index]} features.append(feature) else: f_content = f_content[0] f_content = clean_text(self, f_content) feature = {f_name: f_content} features.append(feature) return features
def _parse_retailer_key(self, response): retailer_key = extract_first(response.xpath('//span[@itemprop="sku"]/text()')) return clean_text(self, retailer_key)
def _parse_sku(self, response): sku = extract_first(response.xpath('//span[@itemprop="sku"]/text()')) return clean_text(self, sku)
def _parse_model(self, response): model = extract_first(response.xpath('//span[@itemprop="mpn"]/text()')) return clean_text(self, model)
def _parse_model(self, response): model_number = response.xpath( '//dt[@class="product-specification-list__label" and contains(text(),"Model")]/following::dd/text()' ).extract_first() if model_number: return clean_text(self, model_number)
def _parse_model(self, response): model = re.search('Model number:([^<]+)</p>', response.text) if model: model = model.group(1).strip() return clean_text(self, model)