Exemplo n.º 1
0
    def test_fit(self):
        scores = [1, 2, 5, 5, 3, 3.5, 3.6, 3.7, 3.8, 3.9, 4.0, 10]
        result = NumberNormalizer().fit(scores)

        expected = [
            -1.0, -0.77777777, -0.11111111, -0.11111111, -0.55555555,
            -0.44444444, -0.42222222, -0.39999999, -0.37777777, -0.35555555,
            -0.33333333, 1.0
        ]

        for expected_score, result_score in zip(expected, result):
            self.assertAlmostEqual(expected_score, result_score)

        # Test with rounding at the fourth digit
        result_rounded = NumberNormalizer(decimal_rounding=4).fit(scores)

        expected_rounded = [
            -1.0, -0.7778, -0.1111, -0.1111, -0.5556, -0.4444, -0.4222, -0.4,
            -0.3778, -0.3556, -0.3333, 1.0
        ]

        for expected_score_rounded, result_score_rounded in zip(
                expected_rounded, result_rounded):
            self.assertAlmostEqual(expected_score_rounded,
                                   result_score_rounded)
    def test_graph_creation_exo_missing(self):
        # Test multiple graph creation possibilities with not existent exo_representations/exo_properties

        # Import ratings as DataFrame
        ratings_import = RatingsImporter(
            source=CSVFile(ratings_filename),
            from_id_column='user_id',
            to_id_column='item_id',
            score_column='points',
            timestamp_column='timestamp',
            score_processor=NumberNormalizer()
        )
        ratings_frame = ratings_import.import_ratings()

        # Create graph with non-existent exo_properties
        g = NXFullGraph(
            source_frame=ratings_frame,
            item_contents_dir=movies_dir,
            user_contents_dir=user_dir,
            item_exo_properties=['asdds', 'dsdds'],
            user_exo_properties=['vvvv']  # It's the column in the users DAT which identifies the gender
        )

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertEqual(len(g.property_nodes), 0)

        # Create graph with non-existent exo_representations
        g = NXFullGraph(
            source_frame=ratings_frame,
            item_contents_dir=movies_dir,
            user_contents_dir=user_dir,
            item_exo_representation="asdsa",
            user_exo_representation="dsdssd"
        )

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertEqual(len(g.property_nodes), 0)

        # Create graph with non-existent exo_representations and non-existent exo_properties
        g = NXFullGraph(
            source_frame=ratings_frame,
            item_contents_dir=movies_dir,
            user_contents_dir=user_dir,
            user_exo_representation='not_exist',
            item_exo_representation='not_Exist2',
            item_exo_properties=["asdsa"],
            user_exo_properties=["dsdssd"]
        )

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertEqual(len(g.property_nodes), 0)
Exemplo n.º 3
0
    def test_graph_creation(self):
        # Test multiple graph creation possibilities

        # Import ratings as DataFrame
        ratings_import = RatingsImporter(
            source=CSVFile(ratings_filename),
            rating_configs=[
                RatingsFieldConfig(field_name='points',
                                   processor=NumberNormalizer(min_=1, max_=5))
            ],
            from_field_name='user_id',
            to_field_name='item_id',
            timestamp_field_name='timestamp',
        )
        ratings_frame = ratings_import.import_ratings()

        # Create graph without setting the representation
        # EX. Create graph with properties 'producer' and 'starring' from
        # all exo representation, since there can be multiple exo representation
        # containing the same properties
        g = NXFullGraph(
            source_frame=ratings_frame,
            item_contents_dir=movies_dir,
            user_contents_dir=user_dir,
            item_exo_properties=['producer', 'starring'],
            user_exo_properties=[
                '1'
            ]  # It's the column in the users DAT which identifies the gender
        )

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertGreater(len(g.property_nodes), 0)

        # Create graph without setting properties,
        # so ALL exo properties of the representation 0 will be retrieved
        g = NXFullGraph(source_frame=ratings_frame,
                        item_contents_dir=movies_dir,
                        user_contents_dir=user_dir,
                        item_exo_representation="0",
                        user_exo_representation="0")

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertGreater(len(g.property_nodes), 0)

        # Create graph specifying without properties
        g = NXFullGraph(ratings_frame)

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertEqual(len(g.property_nodes), 0)
Exemplo n.º 4
0
    def test_graph_creation(self):
        # Test multiple graph creation possibilities

        # Import ratings as DataFrame
        ratings_import = RatingsImporter(
            source=CSVFile(ratings_filename),
            rating_configs=[
                RatingsFieldConfig(field_name='points',
                                   processor=NumberNormalizer(min_=1, max_=5))
            ],
            from_field_name='user_id',
            to_field_name='item_id',
            timestamp_field_name='timestamp',
        )
        ratings_frame = ratings_import.import_ratings()

        # Create graph using the property 'starring' from representation '0'
        g = NXTripartiteGraph(ratings_frame,
                              movies_dir,
                              item_exo_representation="0",
                              item_exo_properties=['starring'])

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertGreater(len(g.property_nodes), 0)

        # Create graph specifying only the exo representation
        g = NXTripartiteGraph(ratings_frame,
                              movies_dir,
                              item_exo_representation="0")

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertGreater(len(g.property_nodes), 0)

        # Create graph specifying only the exo representation
        g = NXTripartiteGraph(ratings_frame,
                              movies_dir,
                              item_exo_properties=['starring'])

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertGreater(len(g.property_nodes), 0)

        # Create graph specifying without properties
        g = NXTripartiteGraph(ratings_frame)

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertEqual(len(g.property_nodes), 0)
 def test_fit(self):
     self.assertAlmostEqual(NumberNormalizer(-10, -5).fit(-6), .6, places=3)
     self.assertAlmostEqual(NumberNormalizer(-5, 4).fit(0.5),
                            0.222,
                            places=3)
     self.assertAlmostEqual(NumberNormalizer(0, 5).fit(2), -0.2, places=3)
     self.assertAlmostEqual(NumberNormalizer(1, 5).fit(2), -0.5, places=3)
     self.assertAlmostEqual(NumberNormalizer(-7, 0).fit(-6),
                            -0.714,
                            places=3)
     self.assertAlmostEqual(NumberNormalizer(0, 10).fit(0.5),
                            -0.9,
                            places=3)
     self.assertAlmostEqual(NumberNormalizer(0, 10).fit(11), 10, places=3)
     self.assertAlmostEqual(NumberNormalizer(0, 10).fit(-1), 0, places=3)
Exemplo n.º 6
0
    def test_graph_creation(self):
        # Test multiple graph creation possibilities

        # Import ratings as DataFrame
        ratings_import = RatingsImporter(source=CSVFile(ratings_filename),
                                         from_id_column='user_id',
                                         to_id_column='item_id',
                                         score_column='points',
                                         timestamp_column='timestamp',
                                         score_processor=NumberNormalizer())
        ratings_frame = ratings_import.import_ratings()

        # Create graph using the property 'starring' from representation '0' ('dbpedia')
        g = NXTripartiteGraph(ratings_frame,
                              movies_dir,
                              item_exo_representation=0,
                              item_exo_properties=['starring'])

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertGreater(len(g.property_nodes), 0)

        # Create graph specifying only the exo representation
        g = NXTripartiteGraph(ratings_frame,
                              movies_dir,
                              item_exo_representation="dbpedia")

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertGreater(len(g.property_nodes), 0)

        # Create graph specifying only the exo representation
        g = NXTripartiteGraph(ratings_frame,
                              movies_dir,
                              item_exo_properties=['starring'])

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertGreater(len(g.property_nodes), 0)

        # Create graph specifying without properties
        g = NXTripartiteGraph(ratings_frame)

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertEqual(len(g.property_nodes), 0)
Exemplo n.º 7
0
 def test_import_ratings(self):
     RatingsImporter(
         source=JSONFile(file_path=file_path),
         output_directory="test_ratings",
         rating_configs=[
             RatingsFieldConfig(field_name="review_title",
                                processor=TextBlobSentimentAnalysis()),
             RatingsFieldConfig(field_name="text",
                                processor=TextBlobSentimentAnalysis()),
             RatingsFieldConfig(field_name="stars",
                                processor=NumberNormalizer(min_=0, max_=5))
         ],
         from_field_name="user_id",
         to_field_name="item_id",
         timestamp_field_name="timestamp").import_ratings()
     """
Exemplo n.º 8
0
    def test_import_ratings_w_score_processor(self):
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column=0,
                             to_id_column=1,
                             score_column=4,
                             score_processor=NumberNormalizer())

        ratings = ri.import_ratings()

        expected_columns = ['from_id', 'to_id', 'score']
        result_columns = list(ratings.columns)

        self.assertEqual(expected_columns, result_columns)

        score_result = list(ratings['score'])

        self.assertTrue(-1 <= score <= 1 for score in score_result)
    def test_import_ratings(self):
        file_path = '../../../datasets/test_import_ratings.json'
        try:
            with open(file_path):
                pass
        except FileNotFoundError:
            file_path = 'datasets/test_import_ratings.json'

        print(file_path)
        RatingsImporter(source=JSONFile(file_path=file_path),
                        output_directory="test_ratings",
                        rating_configs=[
                            RatingsFieldConfig(preference_field_name="review_title",
                                               processor=TextBlobSentimentAnalysis()),
                            RatingsFieldConfig(preference_field_name="text",
                                               processor=TextBlobSentimentAnalysis()),
                            RatingsFieldConfig(preference_field_name="stars",
                                               processor=NumberNormalizer(min_=0, max_=5))],
                        from_field_name="user_id",
                        to_field_name="item_id",
                        timestamp_field_name="timestamp").import_ratings()
Exemplo n.º 10
0
from orange_cb_recsys.recsys.config import RecSysConfig

THIS_DIR = os.path.dirname(os.path.abspath(__file__))
contents_path = os.path.join(THIS_DIR, "../../contents")
datasets_path = os.path.join(THIS_DIR, "../../datasets")
ratings_filename = os.path.join(datasets_path, "examples/new_ratings.csv")
users_dir = os.path.join(contents_path,
                         "examples/ex_1/users_1600355755.1935306")
items_dir = os.path.join(contents_path,
                         "examples/ex_1/movies_1600355972.49884")

t_ratings = RatingsImporter(
    source=CSVFile(ratings_filename),
    rating_configs=[
        RatingsFieldConfig(field_name='points',
                           processor=NumberNormalizer(min_=1, max_=5))
    ],
    from_field_name='user_id',
    to_field_name='item_id',
    timestamp_field_name='timestamp',
).import_ratings()


class TestRankingEvalModel(TestCase):
    def test_fit(self):

        recsys_config = RecSysConfig(users_directory=users_dir,
                                     items_directory=items_dir,
                                     score_prediction_algorithm=None,
                                     ranking_algorithm=ClassifierRecommender(
                                         {'Plot': '0'}, SVM()),
Exemplo n.º 11
0
    def test_fit(self):
        """
        item_id_list = [
            'tt0112281',
            'tt0112302',
            'tt0112346',
            'tt0112453',
            'tt0112641',
            'tt0112760',
            'tt0112896',
            'tt0113041',
            'tt0113101',
            'tt0113189',
            'tt0113228',
            'tt0113277',
            'tt0113497',
            'tt0113845',
            'tt0113987',
            'tt0114319',
            'tt0114388',
            'tt0114576',
            'tt0114709',
            'tt0114885',
        ]

        record_list = []
        for i in range(1, 7):
            extract_items = set([x for i, x in enumerate(item_id_list) if np.random.randint(0, 2) == 1 and i < 10])
            for item in extract_items:
                record_list.append((str(i), item, str(np.random.randint(-0, 11) / 10)))

        t_ratings = pd.DataFrame.from_records(record_list, columns=['from_id', 'to_id', 'score'])
        """
        ratings_filename = 'datasets/examples/new_ratings.csv'
        t_ratings = RatingsImporter(
            source=CSVFile(ratings_filename),
            rating_configs=[
                RatingsFieldConfig(field_name='points',
                                   processor=NumberNormalizer(min_=1, max_=5))
            ],
            from_field_name='user_id',
            to_field_name='item_id',
            timestamp_field_name='timestamp',
        ).import_ratings()
        print(t_ratings)

        recsys_config = RecSysConfig(
            users_directory='contents/examples/ex_1/users_1600355755.1935306',
            items_directory='contents/examples/ex_1/movies_1600355972.49884',
            score_prediction_algorithm=None,
            ranking_algorithm=CentroidVector(item_field='Plot',
                                             field_representation='1',
                                             similarity=CosineSimilarity()),
            rating_frame=t_ratings)
        try:
            RankingAlgEvalModel(config=recsys_config,
                                partitioning=KFoldPartitioning(),
                                metric_list=[
                                    Precision(0.4),
                                    Recall(0.4),
                                    FNMeasure(1, 0.4),
                                    MRR(0.4),
                                    NDCG({
                                        0: (-1, 0),
                                        1: (0, 1)
                                    }),
                                    Correlation('pearson'),
                                    Correlation('kendall'),
                                    Correlation('spearman')
                                ]).fit()
        except TypeError:
            pass
        except ValueError:
            pass