def match(self, meta, search_item, new_item): import logging logging.error("===================================================") logging.error(search_item) logging.error(new_item) logging.error(self.match_lego_name(search_item, new_item)) name = filter_category(new_item['name'], search_item['category']) logging.error("Filterer name: %s" % name) brand_matches = brand_match(new_item) name_matches = name_fuzzy_match(search_item['name'], name) sku_matches = sku_match(search_item, new_item) score = name_fuzzy_score(search_item['name'], name) partial_score = name_fuzzy_partial_score(search_item['name'], name) search_price = search_item.get('price') if search_price: self.log("[[TESTING]] Search price: %s" % str(search_price)) self.log("[[TESTING]] Item price: %s" % str(new_item['price'])) if isinstance(new_item['price'], tuple): self.log("[[TESTING]] Item price is tuple") price_matches = any([check_price_valid(search_price, x) for x in new_item['price']]) price_matches_soft = \ any([check_price_valid(search_price, x, min_ratio=0.4, max_ratio=9) for x in new_item['price']]) else: price_matches = check_price_valid(search_price, new_item['price']) price_matches_soft = check_price_valid(search_price, new_item['price'], min_ratio=0.4, max_ratio=9) else: price_matches = True price_matches_soft = True product_matches = False if sku_matches and price_matches_soft: product_matches = True elif score >= 80 and price_matches_soft: product_matches = True elif partial_score >= 90 and price_matches: product_matches = True elif score >= 60 and price_matches: product_matches = True logging.error("Brand matches: %s" % brand_matches) logging.error("Matches: %s" % name_matches) logging.error("SKU Matches: %s" % sku_matches) logging.error("Match score: %s" % score) logging.error("Match partial score: %s" % partial_score) logging.error("Match price: %s" % price_matches) logging.error("Match price soft: %s" % price_matches_soft) logging.error("Product matches: %s" % product_matches) logging.error("===================================================") contains_excluded_words = any([self.match_text(x, new_item) for x in minifigures_words]) return brand_matches \ and product_matches \ and not contains_excluded_words
def match(self, meta, search_item, new_item): import logging logging.error("===================================================") logging.error(search_item) logging.error(new_item) logging.error(self.match_lego_name(search_item, new_item)) name = filter_category(new_item['name'], search_item['category']) logging.error("Filterer name: %s" % name) brand_matches = brand_match(new_item) name_matches = name_fuzzy_match(search_item['name'], name) sku_matches = sku_match(search_item, new_item) score = name_fuzzy_score(search_item['name'], name) partial_score = name_fuzzy_partial_score(search_item['name'], name) search_price = search_item.get('price') if search_price: price_matches = check_price_valid(search_price, new_item['price']) price_matches_soft = check_price_valid(search_price, new_item['price'], min_ratio=0.4, max_ratio=9) else: price_matches = True price_matches_soft = True product_matches = False if sku_matches and price_matches_soft: product_matches = True elif score >= 80 and price_matches_soft: product_matches = True elif partial_score >= 90 and price_matches: product_matches = True elif score >= 60 and price_matches: product_matches = True logging.error("Brand matches: %s" % brand_matches) logging.error("Matches: %s" % name_matches) logging.error("SKU Matches: %s" % sku_matches) logging.error("Match score: %s" % score) logging.error("Match partial score: %s" % partial_score) logging.error("Match price: %s" % price_matches) logging.error("Match price soft: %s" % price_matches_soft) logging.error("Product matches: %s" % product_matches) logging.error("===================================================") return brand_matches \ and product_matches \ and not self.match_text('mini figures from', new_item) \ and not self.match_text('mini figures only', new_item) \ and not self.match_text('from set', new_item) \ and not self.match_text('from sets', new_item)
def match(self, meta, search_item, new_item): import logging logging.error("===================================================") logging.error(search_item) logging.error(new_item) logging.error(self.match_lego_name(search_item, new_item)) brand = new_item.get('brand').upper() if new_item.get('brand') else 'no brand' name = filter_category(new_item['name'], search_item['category']) logging.error("Filterer name: %s" % name) brand_matches = brand == 'LEGO' or brand.startswith('LEGO ') \ or 'LEGO' in brand or brand in re.sub(r'[^\w]', ' ', search_item['category'].upper())\ or 'LEGO' in new_item['name'].upper() name_matches = name_fuzzy_match(search_item['name'], name) sku_matches = sku_match(search_item, new_item) score = name_fuzzy_score(search_item['name'], name) partial_score = name_fuzzy_partial_score(search_item['name'], name) search_price = search_item.get('price') if search_price: price_matches = check_price_valid(search_price, new_item['price']) price_matches_soft = check_price_valid(search_price, new_item['price'], min_ratio=0.4, max_ratio=9) else: price_matches = True price_matches_soft = True product_matches = False if sku_matches and price_matches_soft: product_matches = True elif score >= 80 and price_matches_soft: product_matches = True elif partial_score >= 90 and price_matches: product_matches = True elif score >= 60 and price_matches: product_matches = True logging.error("Brand matches: %s" % brand_matches) logging.error("Matches: %s" % name_matches) logging.error("SKU Matches: %s" % sku_matches) logging.error("Match score: %s" % score) logging.error("Match partial score: %s" % partial_score) logging.error("Match price: %s" % price_matches) logging.error("Match price soft: %s" % price_matches_soft) logging.error("Product matches: %s" % product_matches) logging.error("===================================================") contains_excluded_words = any([self.match_text(x, new_item) for x in minifigures_words]) return brand_matches \ and product_matches \ and not contains_excluded_words \ and super(LegoAmazonSpider, self).match(meta, search_item, new_item)
def match(self, meta, search_item, new_item): # to mimic behaviour of old spider if not self.match_lego_name(search_item, new_item): return False name = filter_category(new_item['name'], search_item['category']) brand_matches = brand_match(new_item) name_matches = name_fuzzy_match(search_item['name'], name) sku_matches = sku_match(search_item, new_item) score = name_fuzzy_score(search_item['name'], name) partial_score = name_fuzzy_partial_score(search_item['name'], name) search_price = search_item.get('price') if search_price: if isinstance(new_item['price'], tuple): price_matches = any([ check_price_valid(search_price, x) for x in new_item['price'] ]) price_matches_soft = \ any([check_price_valid(search_price, x, min_ratio=0.4, max_ratio=9) for x in new_item['price']]) else: price_matches = check_price_valid(search_price, new_item['price']) price_matches_soft = check_price_valid(search_price, new_item['price'], min_ratio=0.4, max_ratio=9) else: price_matches = True price_matches_soft = True product_matches = False if sku_matches and price_matches_soft: product_matches = True elif score >= 80 and price_matches_soft: product_matches = True elif partial_score >= 90 and price_matches: product_matches = True elif score >= 60 and price_matches: product_matches = True contains_excluded_words = any( [self.match_text(x, new_item) for x in minifigures_words]) return brand_matches \ and product_matches \ and not contains_excluded_words \ and super(BaseLegoAmazonUSASpider, self).match(meta, search_item, new_item)