def getProductDimension(self, instance_of, instance_val): if self.htmlObject is None: return '' if instance_val is None: return '' content = None dimension = '' if instance_of == "DOCUMENT" and len(instance_val) > 0: content = self.getElementValue(instance_val) elif instance_of == "STRING" and len(instance_val) > 0: content = instance_val content = StringUtil.remove_html_tags(content) content = StringUtil.str_cleaner(content, r'\\([a-z0-9]{3})', '') content = StringUtil.str_cleaner(content, r'[^0-9a-zA-Z\s\-\(\).,"\'&]+', '') if content is not None and type( content) is not None and len(content) > 0: # for pat in self.dimensionPatternsToRemove: # content = StringUtil.str_cleaner(content, pat, "") for sptf in self.dimensionPatterns: if StringUtil.str_find_str(str(content), sptf): dimension = StringUtil.str_search_str(str(content), sptf) break if dimension and len(dimension) == 0 or len(dimension) > 35: dimension = self.getProductWeight(content) return dimension
def getProductWeight(self, instance_val): # self.writeToFile('content_raw.txt',instance_val) # self.writeToFile('content.txt',content.strip()) content = '' if instance_val is not None: content = StringUtil.str_cleaner(instance_val, r'<[^>]*>', '') content = StringUtil.str_cleaner(content, r'\s\s', '') content = StringUtil.str_search_str( content, r"(item weight|Shipping Weight)(:|:\s|\s:)(\d+(\.\d{1,2})?)(\s|\S)(ounce|pound|lb\s|lbs)" ) # self.writeToFile('content.txt',content.strip()) return content