def test_product_product_strengths_incremental_new_user_5_to_2_to_5star(self): """ Tests whether the product x product strengths generated on a step-by-step basis match exactly those created from scratch. This test saves a 2-star activity with a 5-star product that had never been consumed, then another activity with the same product (this time a 5-star activity), checking whether all strengths were correctly updated. """ user = "******" product = "p_mus_1" activity_type = self.session_context.activities_by_rating[3][0] date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 9:00:00")) activity = {"external_user_id": user, "external_product_id": product, "activity": activity_type, "created_at": date} pt.update_templates(self.session_context, activity) tasks.update_summaries(self.session_context, activity) activity_type = self.session_context.activities_by_rating[5][0] date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 9:01:00")) activity = {"external_user_id": user, "external_product_id": product, "activity": activity_type, "created_at": date} pt.update_templates(self.session_context, activity) tasks.update_summaries(self.session_context, activity) self.compare_incremental_vs_from_scratch()
def test_product_product_strengths_incremental_random(self): """ Tests whether the product x product strengths generated on a step-by-step basis match exactly those created from scratch. This test saves several random activities in a row, checking whether all strengths were correctly updated. """ if not tests.INCLUDE_RANDOM_TESTS: return all_users = [u for u in self.db_proxy.fetch_all_user_ids()] all_products = [p for p in self.db_proxy.fetch_all_product_ids()] for i in range(100): user = random.choice(all_users) product = random.choice(all_products) activity_type = random.choice(self.session_context.supported_activities) date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 6:00:00")) + dt.timedelta(seconds=2 * i) activity = {"external_user_id": user, "external_product_id": product, "activity": activity_type, "created_at": date} # print(activity) pt.update_templates(self.session_context, activity) tasks.update_summaries(self.session_context, activity) self.compare_incremental_vs_from_scratch()
def main(argv): if len(argv) < 5: msg = "You must specify the environment, the external_user_id, " \ "the external_product_id, the activity type and the activity date" log.error(msg) return {"success": False, "message": msg} try: # command-line arguments env = argv[0] user = argv[1] product = argv[2] activity_type = argv[3] activity_date = dateutil.parser.parse(argv[4]) activity = {"external_user_id": user, "external_product_id": product, "activity": activity_type, "created_at": activity_date} session = init_session(env) maintenance.update_summaries(session, activity) return {"success": True} except Exception: log.exception('Exception on {0}:'.format(__name__)) return {"success": False, "message": traceback.format_exc()}
def test_user_user_strengths_incremental_with_new_impressions_identified_users(self): """ Tests whether the user x user strengths generated on a step-by-step basis match exactly those created from scratch. """ test_descriptions = [("u_esp_4", "p_nonsense_1", "p_empty_with_missing_category", "p_filter_2", "buy")] for idx, (user, product1, product2, product3, activity_type) in enumerate(test_descriptions): # Saves a couple of impressions for the chosen user date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 6:00:00") + dt.timedelta(seconds=(2 * idx + 1))) self.db_proxy.increment_impression_summary(user_id=user, product_id=product1, date=date, anonymous=False) self.db_proxy.increment_impression_summary(user_id=user, product_id=product2, date=date, anonymous=False) ut.generate_templates(self.session_context) # it is important to regenerate from scratch (with all new impressions) # Saves one activity for that same user date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 6:00:00") + dt.timedelta(seconds=(2 * idx + 2))) activity = {"external_user_id": user, "external_product_id": product3, "activity": activity_type, "created_at": date} ut.update_templates(self.session_context, activity) tasks.update_summaries(self.session_context, activity) self.compare_incremental_vs_from_scratch( target_users=[user] if self.session_context.impressions_enabled else None)
def test_user_user_strengths_incremental_with_new_impressions_two_new_products(self): """ Tests whether the user x user strengths generated on a step-by-step basis match exactly those created from scratch. This test saves two new, identical products, with impressions for only one user. After activities of a like-minded user have been saved involving those products, checks whether all strengths were correctly updated. """ # Saves two new, identical products. Initially, no users will have impressions on them. id_twin_product_1 = "p_tec_TWIN_1" id_twin_product_2 = "p_tec_TWIN_2" date = self.session_context.get_present_date() - dt.timedelta(days=2) twin_product_1 = {"external_id": id_twin_product_1, "language": "english", "date": date, "resources": {"title": "Whatever Gets You Through The Night"}, "full_content": """Begin. Technology. Technology. This is all we got. End.""", "category": "Nonsense"} twin_product_2 = {"external_id": id_twin_product_2, "language": "english", "date": date, "resources": {"title": "Whatever Gets You Through The Night"}, "full_content": """Begin. Technology. Technology. This is all we got. End.""", "category": "Nonsense"} self.db_proxy.insert_product(twin_product_1) self.db_proxy.insert_product(twin_product_2) user1 = "u_eco_1" user2 = "u_eco_2" activity_type = self.session_context.activities_by_rating[5][0] # Saves an impression on just one of the new products date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 9:00:00")) self.db_proxy.increment_impression_summary(user_id=user1, product_id=id_twin_product_1, date=date, anonymous=False) # Saves a couple of activities for another user using the new products activity = {"external_user_id": user2, "external_product_id": id_twin_product_1, "activity": activity_type, "created_at": self.session_context.get_present_date()} ut.update_templates(self.session_context, activity) tasks.update_summaries(self.session_context, activity) self.compare_incremental_vs_from_scratch(target_users=[user2] if self.session_context.impressions_enabled else None) activity = {"external_user_id": user2, "external_product_id": id_twin_product_2, "activity": activity_type, "created_at": self.session_context.get_present_date()} ut.update_templates(self.session_context, activity) tasks.update_summaries(self.session_context, activity) self.compare_incremental_vs_from_scratch( target_users=[user2] if self.session_context.impressions_enabled else None)
def test_out_boost(self): """ Tests the effect of applying an out-boost on recommendations for some activity types. It applies to all user-based heuristics. """ target = "u_eco_2" session = tests.init_session(user_id=target, algorithm=self.algorithm) recommender = session.get_recommender() # Determines the index of the first actual value in the score tuples # produced by the recommender (note that hybrid recommenders use the first # position to indicate the algorithm number) if recommender.is_hybrid(): start_index = 1 else: start_index = 0 recommendations = recommender.recommend(100) nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!") former_top_product = recommendations[0][1] old_strength = recommendations[0][0] # Meta-test boost_activity_type = None out_boost = 1 for boost_activity_type, out_boost in self.session_context.out_boost_by_activity.items(): if out_boost != 1: break nose.tools.ok_(out_boost > 1, "Weak text fixture. There should be at least one out-boosted activity.") # Saves out-boosted activities for all templates who had consumed the former top product templates = [t[1] for t in session.user_templates] for template in templates: recent_product_activities_of_template = session.recent_activities_by_product_by_template_user.get( template, {}) if former_top_product in recent_product_activities_of_template: activity = {"external_user_id": template, "external_product_id": former_top_product, "activity": boost_activity_type, "created_at": session.get_present_date()} tasks.update_summaries(session, activity) session.refresh() recommendations = recommender.recommend(100) nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!") new_strength = None for rec in recommendations: if rec[1] == former_top_product: new_strength = rec[0] break nose.tools.ok_(new_strength is not None, "The former top recommendation should have been recommended again.") for i in range(start_index, len(new_strength)): old_strength_value = old_strength[i] new_strength_value = new_strength[i] nose.tools.ok_(abs(new_strength_value / old_strength_value - out_boost) < tests.FLOAT_DELTA, "Incorrect application of the activity in-boost")
def test_multi_activities_blocking_vs_non_blocking(self): """ Checks that blocking activities prevent items from being recommended, and that non-blocking activities do not do so. """ # Economia for i in range(1, dp.N_USR_ECONOMIA + 1): target = "u_eco_" + str(i) session = tests.init_session(user_id=target, algorithm=self.algorithm) recommender = session.get_recommender() recommendations = recommender.recommend(self.n_recommendations) nose.tools.ok_(len(recommendations) > 0, "Empty recommendation.") if len(recommendations) > 0: top_product = recommendations[0][1] else: return supported_activities = self.session_context.supported_activities blocking_activities = self.session_context.blocking_activities non_blocking_activities = list(set(supported_activities) - set(blocking_activities)) # Meta-tests nose.tools.ok_(len(non_blocking_activities) > 0, "Weak test fixture. There should be at least one non_blocking activity") nose.tools.ok_(len(blocking_activities) > 0, "Weak test fixture. There should be at least one blocking activity") # Saves a non-blocking activity first activity = {"external_user_id": target, "external_product_id": top_product, "activity": non_blocking_activities[0], "created_at": self.session_context.get_present_date()} tasks.update_summaries(self.session_context, activity) session = tests.init_session(user_id=target, algorithm=self.algorithm) recommender = session.get_recommender() recommendations = recommender.recommend(self.n_recommendations) recommended_products = [r[1] for r in recommendations] nose.tools.ok_(top_product in recommended_products, "A non-blocking activity should not prevent a product from being recommended") # Saves a blocking activity first activity = {"external_user_id": target, "external_product_id": top_product, "activity": blocking_activities[0], "created_at": self.session_context.get_present_date()} tasks.update_summaries(self.session_context, activity) session = tests.init_session(user_id=target, algorithm=self.algorithm) recommender = session.get_recommender() recommendations = recommender.recommend(self.n_recommendations) recommended_products = [r[1] for r in recommendations] if self.session_context.filter_strategy == ctx.AFTER_SCORING: nose.tools.ok_(top_product not in recommended_products, "A blocking activity should prevent a product from being recommended")
def test_user_user_strengths_incremental_with_new_impressions_random(self): """ Tests whether the user x user strengths generated on a step-by-step basis match exactly those created from scratch. This test saves several random activities in a row, checking whether all strengths were correctly updated. """ if not tests.INCLUDE_RANDOM_TESTS: return all_users = [u for u in self.db_proxy.fetch_all_user_ids()] all_products = [p for p in self.db_proxy.fetch_all_product_ids()] for i in range(100): user = random.choice(all_users) is_anonymous = config.is_anonymous(user) print("user: %s" % user) # Saves a couple of impressions for the chosen user date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 6:00:00")) + dt.timedelta(seconds=2 * i) product1 = random.choice(all_products) product2 = random.choice(all_products) self.db_proxy.increment_impression_summary(user_id=user, product_id=product1, date=date, anonymous=is_anonymous) self.db_proxy.increment_impression_summary(user_id=user, product_id=product2, date=date, anonymous=is_anonymous) print("impressions --> %s, %s" % (product1, product2)) ut.generate_templates(self.session_context) # it is important to regenerate from scratch (with all new impressions) # Saves one activity for that same user product3 = random.choice(all_products) activity_type = random.choice(self.session_context.supported_activities) date = pytz.utc.localize(dateutil.parser.parse("1988-11-06 6:00:00")) + dt.timedelta(seconds=2 * i + 1) activity = {"external_user_id": user, "external_product_id": product3, "activity": activity_type, "created_at": date} print("activity --> " + str(activity)) ut.update_templates(self.session_context, activity) tasks.update_summaries(self.session_context, activity) self.compare_incremental_vs_from_scratch( target_users=[user] if self.session_context.impressions_enabled else None)
def test_unprocessed_base_product(self): """ Tests whether a base product which has not yet been processed (i.e., it lacks a product model) will not cause the recommender to fail. """ user_id = "u_tec_1" session = tests.init_session(user_id=user_id, algorithm=self.algorithm) activity = {"external_user_id": user_id, "external_product_id": "unprocessed_product", "activity": "buy", "created_at": session.get_present_date()} tasks.update_summaries(self.session_context, activity) session.refresh() recommender = session.get_recommender() recommendations = recommender.recommend(100) nose.tools.ok_(len(recommendations) > 0, "Should have recommended even from an unprocessed base product")
def test_previous_consumption_factor(): target_user = "******" session_context = tests.init_session(user_id=target_user, custom_settings={'previous_consumption_factor': 0.1}) activity = {"external_user_id": target_user, "external_product_id": "p_eco_2", "activity": "buy", "created_at": session_context.get_present_date()} tasks.update_summaries(session_context, activity) session_context.refresh() nose.tools.eq_(session_context.obtain_previous_consumption_factor("p_eco_1"), 1, "Previous consumption factor should be 1 for non-consumed products") nose.tools.ok_(abs(session_context.obtain_previous_consumption_factor("p_eco_2") - 0.1) < tests.FLOAT_DELTA, "Wrong previous consumption factor")
def test_product_product_strengths_incremental_new_user_3star(self): """ Tests whether the product x product strengths generated on a step-by-step basis match exactly those created from scratch. This test saves a new activity with a 3-star product that had never been consumed by the user and checks whether whether all strengths were correctly updated. """ user = "******" product = "p_mus_1" activity_type = self.session_context.activities_by_rating[3][0] activity = {"external_user_id": user, "external_product_id": product, "activity": activity_type, "created_at": self.session_context.get_present_date()} pt.update_templates(self.session_context, activity) tasks.update_summaries(self.session_context, activity) self.compare_incremental_vs_from_scratch()
def populate_activities(session_context, date=None): """ Creates dummy activities. Test users shall consume products of their main interest area, e.g. "u_eco_X" shall only consume products "p_eco_Y". User *empty* shall consume no products at all. """ activity_records = _load_collection("activities") if date is None: date = session_context.get_present_date() - dt.timedelta(days=1) for record in activity_records: for i, product in enumerate(record["products"]): new_date = date + dt.timedelta(seconds=i) activity = {"external_user_id": record['user_id'], "external_product_id": product, "activity": "buy", "created_at": new_date} tasks.update_summaries(session_context, activity)
def test_user_user_strengths_incremental_old_product_5_to_3star(self): """ Tests whether the user x user strengths generated on a step-by-step basis match exactly those created from scratch. This test saves a new 3-star activity with a product that had been consumed before by the target user with a 5-star activity, and checks whether all strengths were correctly updated. """ user = "******" product = "p_eco_2" activity_type = self.session_context.activities_by_rating[3][0] activity = {"external_user_id": user, "external_product_id": product, "activity": activity_type, "created_at": self.session_context.get_present_date()} ut.update_templates(self.session_context, activity) tasks.update_summaries(self.session_context, activity) self.compare_incremental_vs_from_scratch( target_users=[user] if self.session_context.impressions_enabled else None)
def test_base_product_democracy(self): """ Tests whether all base products can send their templates to the final recommendation list. """ user_id = "new_user" session = tests.init_session(user_id=user_id, algorithm=self.algorithm) types = ["esp", "tec", "eco"] for idx, product_type in enumerate(types): activity = {"external_user_id": user_id, "external_product_id": "p_" + product_type + "_1", "activity": "buy", "created_at": session.get_present_date() + dt.timedelta(seconds=idx)} tasks.update_summaries(self.session_context, activity) session.refresh() recommender = session.get_recommender() results = recommender.recommend(self.n_recommendations) for idx, product_type in enumerate(types[-1::-1]): nose.tools.eq_(results[idx][1][2:5], product_type, "A product of type '%s' should have appeared at position %d in the list" % (product_type, idx))
def test_recommend(self, test_recommendation_quality=True): """ Tests whether meaningful recommendations were obtained according to Alg 0. """ target = "u_user_empty" all_users = self.db_proxy.fetch_all_user_ids() for user in all_users: if user != target: activity = {"external_user_id": user, "external_product_id": "p_TOP_POPULAR", "activity": "buy", "created_at": self.session_context.get_present_date()} tasks.update_summaries(self.session_context, activity) if user != "u_user_dummy": activity = {"external_user_id": user, "external_product_id": "p_2ndTOP_POPULAR", "activity": "buy", "created_at": self.session_context.get_present_date()} tasks.update_summaries(self.session_context, activity) # Checks whether all users got recommendations super().test_recommend(test_recommendation_quality=False) # Checks whether the recommendations conform to the top-popularity criterion session = tests.init_session(user_id=target, algorithm=self.algorithm) recommender = session.get_recommender() recommendations = recommender.recommend(2) nose.tools.ok_(len(recommendations) > 0, "No recommendations were retrieved") nose.tools.eq_(recommendations[0][1], "p_TOP_POPULAR", "Weird recommendation -- should be the most popular product") nose.tools.eq_(recommendations[1][1], "p_2ndTOP_POPULAR", "Weird recommendation -- should be the 2nd most popular product")
def test_increment_product_popularity(self): product_1 = "p_mus_1" product_2 = "p_empty" product_ids = [product_1, product_2] popularity_map = self.session_context.data_proxy.fetch_product_popularity(product_ids=product_ids) # sanity check nose.tools.eq_(popularity_map[product_1], 3, "Wrong initial popularity") nose.tools.eq_(popularity_map.get(product_2), None, "Popularity should be None since no one consumed it") activity = {"external_user_id": "u_eco_1", "external_product_id": product_1, "activity": "buy", "created_at": self.session_context.get_present_date() - dt.timedelta(2)} tasks.update_summaries(self.session_context, activity) popularity_map = self.session_context.data_proxy.fetch_product_popularity(product_ids=product_ids) nose.tools.ok_(abs(popularity_map[product_1] - 2) < tests.FLOAT_DELTA, "Wrong popularity") # another activity by the same user, without extending the date range activity = {"external_user_id": "u_eco_1", "external_product_id": product_1, "activity": "buy", "created_at": self.session_context.get_present_date() - dt.timedelta(2)} tasks.update_summaries(self.session_context, activity) popularity_map = self.session_context.data_proxy.fetch_product_popularity(product_ids=product_ids) nose.tools.ok_(abs(popularity_map[product_1] - 2) < tests.FLOAT_DELTA, "Wrong popularity") # another activity by the same user, now extending the date range activity = {"external_user_id": "u_eco_1", "external_product_id": product_1, "activity": "buy", "created_at": self.session_context.get_present_date() - dt.timedelta(3)} tasks.update_summaries(self.session_context, activity) popularity_map = self.session_context.data_proxy.fetch_product_popularity(product_ids=product_ids) nose.tools.ok_(abs(popularity_map[product_1] - 4/3) < tests.FLOAT_DELTA, "Wrong popularity")
def test_in_boost(self): """ Tests the effect of applying an in-boost on recommendations for some activity types. It applies to all recommendation heuristics. """ target = "u_eco_2" history_decay = {'history_decay_function_name': None} session = tests.init_session(user_id=target, custom_settings=history_decay, algorithm=self.algorithm) recommender = session.get_recommender() # Determines the index of the first actual value in the score tuples # produced by the recommender (note that hybrid recommenders use the first # position to indicate the algorithm number) if recommender.is_hybrid(): start_index = 1 else: start_index = 0 recommendations = recommender.recommend(100) nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!") former_top_product = recommendations[0][1] old_strength = recommendations[0][0] # Meta-test boost_activity_type = None in_boost = 1 for boost_activity_type, in_boost in self.session_context.in_boost_by_activity.items(): if in_boost != 1: break nose.tools.ok_(in_boost > 1, "Weak text fixture. There should be at least one in-boosted activity.") activity = {"external_user_id": target, "external_product_id": former_top_product, "activity": boost_activity_type, "created_at": self.session_context.get_present_date()} tasks.update_summaries(self.session_context, activity) session.refresh() recommendations = recommender.recommend(100) nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!") new_strength = None for rec in recommendations: if rec[1] == former_top_product: new_strength = rec[0] break nose.tools.ok_(new_strength is not None, "The former top recommendation should have been recommended again.") for i in range(start_index, len(new_strength)): old_strength_value = old_strength[i] new_strength_value = new_strength[i] nose.tools.ok_(abs(new_strength_value / old_strength_value - in_boost) < tests.FLOAT_DELTA, "Incorrect application of the activity in-boost") self.db_proxy.increment_impression_summary(user_id=target, product_id=former_top_product, date=self.session_context.get_present_date(), anonymous=False) self.db_proxy.increment_impression_summary(user_id=target, product_id=former_top_product, date=self.session_context.get_present_date(), anonymous=False) history_decay = {'history_decay_function_name': 'exponential', 'history_decay_exponential_function_halflife': 2} session = tests.init_session(user_id=target, custom_settings=history_decay, algorithm=self.algorithm) recommender = session.get_recommender() recommendations = recommender.recommend(100) nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!") new_strength = None for rec in recommendations: if rec[1] == former_top_product: new_strength = rec[0] break nose.tools.ok_(new_strength is not None, "The former top recommendation should have been recommended again.") for i in range(start_index, len(new_strength)): old_strength_value = old_strength[i] new_strength_value = new_strength[i] nose.tools.ok_(abs(new_strength_value / old_strength_value - in_boost / 2) < tests.FLOAT_DELTA, "Incorrect application of the in-boost and history decay together")
def test_near_identical(self): """ Tests that two products considered 'near-identical' are not recommended at the same time (within the same page) when the filtering strategy is AFTER_SCORING. """ target = "u_tec_1" id_twin_product_1 = "p_tec_TWIN_1" id_twin_product_2 = "p_tec_TWIN_2" date = self.session_context.get_present_date() - dt.timedelta(days=1) twin_product_1 = {"external_id": id_twin_product_1, "language": "english", "date": date, "expiration_date": date + dt.timedelta(days=30), "resources": {"title": "Whatever Gets You Through The Night"}, "full_content": """Begin. Technology. Technology. This is all we got. End.""", "category": "Nonsense"} twin_product_2 = {"external_id": id_twin_product_2, "language": "english", "date": date, "expiration_date": date + dt.timedelta(days=30), "resources": {"title": "Whatever Gets You Through This Night is Alright"}, "full_content": """Begin. Technology. Technology. This is all we got. End.""", "category": "Nonsense"} self.db_proxy.insert_product(twin_product_1) tasks.process_product(self.session_context, id_twin_product_1) self.db_proxy.insert_product(twin_product_2) tasks.process_product(self.session_context, id_twin_product_2) # makes it so that all users consume (and have impressions on) the twins, except for the target user users = self.db_proxy.fetch_all_user_ids() for user in users: if user != target: activity = {"external_user_id": user, "external_product_id": id_twin_product_1, "activity": "buy", "created_at": self.session_context.get_present_date()} tasks.update_summaries(self.session_context, activity) activity = {"external_user_id": user, "external_product_id": id_twin_product_2, "activity": "buy", "created_at": self.session_context.get_present_date()} tasks.update_summaries(self.session_context, activity) if self.session_context.impressions_enabled: is_anonymous = config.is_anonymous(user) self.db_proxy.increment_impression_summary(user, id_twin_product_1, date=self.session_context.get_present_date(), anonymous=is_anonymous) self.db_proxy.increment_impression_summary(user, id_twin_product_2, date=self.session_context.get_present_date(), anonymous=is_anonymous) ut.generate_templates(self.session_context) pt.generate_templates(self.session_context) pttfidf.generate_templates(self.session_context) # Unfortunately we need to regenerate from scratch, # otherwise the df's of the twins will be different. # First, we recommend WITHOUT near-identical filtering, to check that the twins really appear consecutively. custom_settings = {'near_identical_filter_field': None, 'near_identical_filter_threshold': None} session = tests.init_session(user_id=target, custom_settings=custom_settings, algorithm=self.algorithm) session.refresh() recommender = session.get_recommender() if not recommender.is_hybrid(): # For hybrid recommenders, this check is meaningless. recommendations = recommender.recommend(100) twin_index = -1 for idx, recommendation in enumerate(recommendations): if recommendation[1].startswith("p_tec_TWIN_"): if twin_index >= 0: nose.tools.eq_(idx - twin_index, 1, "The two near-identical products should appear consecutively without filtering") break twin_index = idx # Now we recommend WITH near-identical filtering recommendation_page_size = 5 custom_settings = {'near_identical_filter_field': 'resources.title', 'near_identical_filter_threshold': 2, 'recommendations_page_size': recommendation_page_size} session = tests.init_session(user_id=target, custom_settings=custom_settings, algorithm=self.algorithm) session.refresh() recommender = session.get_recommender() recommendations = recommender.recommend(100) # Sanity check recommended_products = {r[1] for r in recommendations} count_recommended_twins = len({id_twin_product_1, id_twin_product_2} & recommended_products) nose.tools.ok_(count_recommended_twins > 0, "At least one of the twins should have been recommended, otherwise the test is meaningless") # Actual tests twin_index = -1 * recommendation_page_size - 1 # initial value, so the first twin passes the test for idx, recommendation in enumerate(recommendations): if recommendation[1].startswith("p_tec_TWIN_"): nose.tools.ok_(idx - twin_index > 1, # it suffices to show that the twins have been separated "Two near-identical products should not appear within the same recommendations page") twin_index = idx
def test_product_age_decay_exponential(self): """ Tests the effect of applying a product age decay factor based on an exponential function on recommendations. It applies to all recommendation heuristics. """ target = "u_tec_1" id_twin_product_old = "p_tec_TWIN_OLD" id_twin_product_new = "p_tec_TWIN_NEW" # makes it so that the oldest twin is 2 days (the configured half life) older old_date = self.session_context.get_present_date() - dt.timedelta(days=2) new_date = self.session_context.get_present_date() twin_product_old = {"external_id": id_twin_product_old, "language": "english", "date": old_date, "expiration_date": old_date + dt.timedelta(days=30), "resources": {"title": "Whatever Gets You Through The Night"}, "full_content": """Begin. Technology. Technology. This is all we got. End.""", "category": "Nonsense"} twin_product_new = {"external_id": id_twin_product_new, "language": "english", "date": new_date, "expiration_date": new_date + dt.timedelta(days=30), "resources": {"title": "Whatever Gets You Through The Night"}, "full_content": """Begin. Technology. Technology. This is all we got. End.""", "category": "Nonsense"} self.db_proxy.insert_product(twin_product_old) tasks.process_product(self.session_context, id_twin_product_old) self.db_proxy.insert_product(twin_product_new) tasks.process_product(self.session_context, id_twin_product_new) # makes it so that all users consume (and have impressions on) the twins, except for the target user users = self.db_proxy.fetch_all_user_ids() for user in users: if user != target: activity = {"external_user_id": user, "external_product_id": id_twin_product_old, "activity": "buy", "created_at": self.session_context.get_present_date()} tasks.update_summaries(self.session_context, activity) activity = {"external_user_id": user, "external_product_id": id_twin_product_new, "activity": "buy", "created_at": self.session_context.get_present_date()} tasks.update_summaries(self.session_context, activity) if self.session_context.impressions_enabled: is_anonymous = config.is_anonymous(user) self.db_proxy.increment_impression_summary(user, id_twin_product_old, date=self.session_context.get_present_date(), anonymous=is_anonymous) self.db_proxy.increment_impression_summary(user, id_twin_product_new, date=self.session_context.get_present_date(), anonymous=is_anonymous) ut.generate_templates(self.session_context) pt.generate_templates(self.session_context) pttfidf.generate_templates(self.session_context) # Unfortunately we need to regenerate from scratch, # otherwise the df's of the twins will be different. custom_settings = {'product_age_decay_function_name': 'exponential', 'product_age_decay_exponential_function_halflife': 2, 'near_identical_filter_field': None, 'near_identical_filter_threshold': None} # Disables near-identical filtering session = tests.init_session(user_id=target, custom_settings=custom_settings, algorithm=self.algorithm) session.refresh() recommender = session.get_recommender() # Determines the index of the first actual value in the score tuples # produced by the recommender (note that hybrid recommenders use the first # position to indicate the algorithm number) if recommender.is_hybrid(): start_index = 1 else: start_index = 0 recommendations = recommender.recommend(100) nose.tools.ok_(len(recommendations) > 0, "No recommendations were returned!") strength_old_twin = None strength_new_twin = None for rec in recommendations: if rec[1] == id_twin_product_old: strength_old_twin = rec[0] if rec[1] == id_twin_product_new: strength_new_twin = rec[0] for i in range(start_index, len(strength_old_twin)): old_strength_value = strength_old_twin[i] new_strength_value = strength_new_twin[i] nose.tools.ok_(abs(old_strength_value / new_strength_value - 0.5) < tests.FLOAT_DELTA, "Incorrect application of the product age decay")