def setUp(self): mock_broker = MagicMock() self.recommender = SommelierRecommenderBase(b=mock_broker)
class RecommenderTest(unittest.TestCase): dummy_tastings = [ { "author_id": 1, "wine_id": 5, "rating": 9 }, { "author_id": 2, "wine_id": 6, "rating": 10 }, { "author_id": 3, "wine_id": 7, "rating": 11 }, { "author_id": 4, "wine_id": 8, "rating": 12 }] expected_preferences_1 = { 1: [ 0, 9, 0, 0 ], 2: [ 0, 0, 10, 0 ], 3: [ 0, 0, 0, 11 ], 4: [ 12, 0, 0, 0 ] }, [8, 5, 6, 7] expected_preferences_2 = { 5: [ 9, 0, 0, 0 ], 6: [ 0, 10, 0, 0 ], 7: [ 0, 0, 11, 0 ], 8: [ 0, 0, 0, 12 ] }, [1, 2, 3, 4] pearson_r_test_preferences = { "row_1": [ 1, 2, 3, 4, 5 ], "row_2": [ 2, 3, 4, 5, 6 ], "row_3": [ 5, 4, 3, 2, 1 ], "row_4": [ 3, 3, 3, 3, 3 ], "row_5": [ 1, 2 ], "row_6": [ 3, 1 ]} dummy_line_tab_separated = "1\t2\t3\t4" dummy_line_colon_separated = "1::2::3::4" dummy_line_invalid_separator = "1BAZ2BAZ3BAZ4" dummy_line_invalid_values = "Should::NOT::be:Strings" expected_tasting = {'author_id': 1, 'rating': 3, 'tasting_date': '1970-01-01 01:00:04', 'wine_id': 2} expected_movielens_lines_colon = [ "1::5::9::0", "2::6::10::0", "3::7::11::0", "4::8::12::0" ] expected_movielens_lines_tab = [ "1\t5\t9\t0", "2\t6\t10\t0", "3\t7\t11\t0", "4\t8\t12\t0" ] dummy_matrix_a = [[1,1,1,1],[2,2,2,2],[3,3,3,3]] dummy_matrix_b = [[1,1,1,1],[2,4,4,2],[3,3,3,1]] dummy_matrix_c = [[0,2,0,2],[0,2,0,2],[2,2,2,2]] expected_mae_a_b = ((0.0,1.0,0.5),0.5) expected_mae_a_c = ((1.0,1.0,1.0),1.0) def setUp(self): mock_broker = MagicMock() self.recommender = SommelierRecommenderBase(b=mock_broker) def test_pearson_r(self): # rows 1 and 2 have 100% positive correlation self.assertEqual(1.0, self.recommender.pearson_r(self.pearson_r_test_preferences, 'row_1', 'row_2')) # rows 1 and 3 have 100% negative correlation self.assertEqual(-1.0, self.recommender.pearson_r(self.pearson_r_test_preferences, 'row_1', 'row_3')) # row 4 has a standard deviation of 0, which we need to deal with to avoid a division by zero error # when the covariance is divided by the product of the standard deviations # in this case the method returns 0.0, on the basis that a user submitting the same rating for every # item is not expressing any preference at all, so a neutral similarity score is appropriate self.assertEqual(0.0, self.recommender.pearson_r(self.pearson_r_test_preferences, 'row_1', 'row_4')) # cases where there are two or less items for comparison are problematic for pearson_r; any two # lists with 2 items will always result in either 1.0 or -1.0. That is not a useful score, as # there may be no similarity in the ratings at all, so we return 0.0 if there are < 3 items self.assertEqual(0.0, self.recommender.pearson_r(self.pearson_r_test_preferences, 'row_5', 'row_6')) def test_preferences(self): # preferences formatting for author rows / wine columns self.assertEqual(self.expected_preferences_1, self.recommender.preferences(self.dummy_tastings, 'author_id', 'wine_id')) # preferences formatting for wine rows / author columns self.assertEqual(self.expected_preferences_2, self.recommender.preferences(self.dummy_tastings, 'wine_id', 'author_id')) # Movielens data can be encoded with either tabs or double colons (::), so test for both and neither... def test_movielens_line_to_tasting(self): self.assertEqual(self.expected_tasting, self.recommender.movielens_line_to_tasting(self.dummy_line_tab_separated)) self.assertEqual(self.expected_tasting, self.recommender.movielens_line_to_tasting(self.dummy_line_colon_separated)) self.assertRaises(Exception, lambda _: self.recommender.movielens_line_to_tasting(self.dummy_line_invalid_separator)) self.assertRaises(Exception, lambda _: self.recommender.movielens_line_to_tasting(self.dummy_line_invalid_values)) # tastings_to_movielens_format() should use double colon (::) separator by default def test_tastings_to_movielens_format(self): self.assertEqual(self.expected_movielens_lines_colon, self.recommender.tastings_to_movielens_format(self.dummy_tastings)) self.assertEqual(self.expected_movielens_lines_colon, self.recommender.tastings_to_movielens_format(self.dummy_tastings, separator="::")) self.assertEqual(self.expected_movielens_lines_tab, self.recommender.tastings_to_movielens_format(self.dummy_tastings, separator="\t")) def test_evaluate_matrices(self): self.assertEquals(self.expected_mae_a_b, self.recommender.recsys_evaluate_matrices(self.dummy_matrix_a, self.dummy_matrix_b)) self.assertEquals(self.expected_mae_a_c, self.recommender.recsys_evaluate_matrices(self.dummy_matrix_a, self.dummy_matrix_c))
class RecommenderTest(unittest.TestCase): dummy_tastings = [{ "author_id": 1, "wine_id": 5, "rating": 9 }, { "author_id": 2, "wine_id": 6, "rating": 10 }, { "author_id": 3, "wine_id": 7, "rating": 11 }, { "author_id": 4, "wine_id": 8, "rating": 12 }] expected_preferences_1 = { 1: [0, 9, 0, 0], 2: [0, 0, 10, 0], 3: [0, 0, 0, 11], 4: [12, 0, 0, 0] }, [8, 5, 6, 7] expected_preferences_2 = { 5: [9, 0, 0, 0], 6: [0, 10, 0, 0], 7: [0, 0, 11, 0], 8: [0, 0, 0, 12] }, [1, 2, 3, 4] pearson_r_test_preferences = { "row_1": [1, 2, 3, 4, 5], "row_2": [2, 3, 4, 5, 6], "row_3": [5, 4, 3, 2, 1], "row_4": [3, 3, 3, 3, 3], "row_5": [1, 2], "row_6": [3, 1] } dummy_line_tab_separated = "1\t2\t3\t4" dummy_line_colon_separated = "1::2::3::4" dummy_line_invalid_separator = "1BAZ2BAZ3BAZ4" dummy_line_invalid_values = "Should::NOT::be:Strings" expected_tasting = { 'author_id': 1, 'rating': 3, 'tasting_date': '1970-01-01 01:00:04', 'wine_id': 2 } expected_movielens_lines_colon = [ "1::5::9::0", "2::6::10::0", "3::7::11::0", "4::8::12::0" ] expected_movielens_lines_tab = [ "1\t5\t9\t0", "2\t6\t10\t0", "3\t7\t11\t0", "4\t8\t12\t0" ] dummy_matrix_a = [[1, 1, 1, 1], [2, 2, 2, 2], [3, 3, 3, 3]] dummy_matrix_b = [[1, 1, 1, 1], [2, 4, 4, 2], [3, 3, 3, 1]] dummy_matrix_c = [[0, 2, 0, 2], [0, 2, 0, 2], [2, 2, 2, 2]] expected_mae_a_b = ((0.0, 1.0, 0.5), 0.5) expected_mae_a_c = ((1.0, 1.0, 1.0), 1.0) def setUp(self): mock_broker = MagicMock() self.recommender = SommelierRecommenderBase(b=mock_broker) def test_pearson_r(self): # rows 1 and 2 have 100% positive correlation self.assertEqual( 1.0, self.recommender.pearson_r(self.pearson_r_test_preferences, 'row_1', 'row_2')) # rows 1 and 3 have 100% negative correlation self.assertEqual( -1.0, self.recommender.pearson_r(self.pearson_r_test_preferences, 'row_1', 'row_3')) # row 4 has a standard deviation of 0, which we need to deal with to avoid a division by zero error # when the covariance is divided by the product of the standard deviations # in this case the method returns 0.0, on the basis that a user submitting the same rating for every # item is not expressing any preference at all, so a neutral similarity score is appropriate self.assertEqual( 0.0, self.recommender.pearson_r(self.pearson_r_test_preferences, 'row_1', 'row_4')) # cases where there are two or less items for comparison are problematic for pearson_r; any two # lists with 2 items will always result in either 1.0 or -1.0. That is not a useful score, as # there may be no similarity in the ratings at all, so we return 0.0 if there are < 3 items self.assertEqual( 0.0, self.recommender.pearson_r(self.pearson_r_test_preferences, 'row_5', 'row_6')) def test_preferences(self): # preferences formatting for author rows / wine columns self.assertEqual( self.expected_preferences_1, self.recommender.preferences(self.dummy_tastings, 'author_id', 'wine_id')) # preferences formatting for wine rows / author columns self.assertEqual( self.expected_preferences_2, self.recommender.preferences(self.dummy_tastings, 'wine_id', 'author_id')) # Movielens data can be encoded with either tabs or double colons (::), so test for both and neither... def test_movielens_line_to_tasting(self): self.assertEqual( self.expected_tasting, self.recommender.movielens_line_to_tasting( self.dummy_line_tab_separated)) self.assertEqual( self.expected_tasting, self.recommender.movielens_line_to_tasting( self.dummy_line_colon_separated)) self.assertRaises( Exception, lambda _: self.recommender.movielens_line_to_tasting( self.dummy_line_invalid_separator)) self.assertRaises( Exception, lambda _: self.recommender.movielens_line_to_tasting( self.dummy_line_invalid_values)) # tastings_to_movielens_format() should use double colon (::) separator by default def test_tastings_to_movielens_format(self): self.assertEqual( self.expected_movielens_lines_colon, self.recommender.tastings_to_movielens_format(self.dummy_tastings)) self.assertEqual( self.expected_movielens_lines_colon, self.recommender.tastings_to_movielens_format(self.dummy_tastings, separator="::")) self.assertEqual( self.expected_movielens_lines_tab, self.recommender.tastings_to_movielens_format(self.dummy_tastings, separator="\t")) def test_evaluate_matrices(self): self.assertEquals( self.expected_mae_a_b, self.recommender.recsys_evaluate_matrices(self.dummy_matrix_a, self.dummy_matrix_b)) self.assertEquals( self.expected_mae_a_c, self.recommender.recsys_evaluate_matrices(self.dummy_matrix_a, self.dummy_matrix_c))