Exemplo n.º 1
0
    def test_add_score_column(self):
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column=0,
                             to_id_column=1,
                             score_column=4)

        ratings = ri.import_ratings()

        expected_columns = ['from_id', 'to_id', 'score']
        result_columns = list(ratings.columns)

        self.assertEqual(expected_columns, result_columns)

        ratings_w_added_column = ri.add_score_column(
            'stars', column_name='score_duplicate')

        expected_columns = ['from_id', 'to_id', 'score', 'score_duplicate']
        result_columns = list(ratings_w_added_column.columns)

        self.assertEqual(expected_columns, result_columns)

        score_column_added = list(ratings['score_duplicate'])
        expected = [float(row['stars']) for row in self.raw_source_content]

        self.assertEqual(expected, score_column_added)
Exemplo n.º 2
0
    def test_import_ratings_by_index(self):
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column=0,
                             to_id_column=1,
                             score_column=4)

        ratings = ri.import_ratings()

        expected_columns = ['from_id', 'to_id', 'score']
        result_columns = list(ratings.columns)

        self.assertEqual(expected_columns, result_columns)

        from_id_result = list(ratings['from_id'])
        to_id_result = list(ratings['to_id'])
        score_result = list(ratings['score'])

        from_id_expected = [row['user_id'] for row in self.raw_source_content]
        to_id_expected = [row['item_id'] for row in self.raw_source_content]
        score_expected = [
            float(row['stars']) for row in self.raw_source_content
        ]

        self.assertTrue(
            all(isinstance(from_id, str) for from_id in from_id_result))
        self.assertTrue(all(isinstance(to_id, str) for to_id in to_id_result))
        self.assertTrue(all(
            isinstance(score, float) for score in score_result))

        self.assertEqual(from_id_expected, from_id_result)
        self.assertEqual(to_id_expected, to_id_result)
        self.assertEqual(score_expected, score_result)
Exemplo n.º 3
0
    def test_add_score_column_w_score_processor(self):
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column=0,
                             to_id_column=1,
                             score_column=4)

        ratings = ri.import_ratings()

        expected_columns = ['from_id', 'to_id', 'score']
        result_columns = list(ratings.columns)

        self.assertEqual(expected_columns, result_columns)

        ratings_w_added_column = ri.add_score_column(
            'review_title',
            column_name='text_polarity',
            score_processor=TextBlobSentimentAnalysis())

        expected_columns = ['from_id', 'to_id', 'score', 'text_polarity']
        result_columns = list(ratings_w_added_column.columns)

        self.assertEqual(expected_columns, result_columns)

        score_column_added = list(ratings['text_polarity'])

        self.assertTrue(-1 <= score <= 1 for score in score_column_added)
Exemplo n.º 4
0
    def test_exception_add_score_column(self):
        # Test exception score column can't be converted into float
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column='user_id',
                             to_id_column='item_id',
                             score_column='stars')

        with self.assertRaises(ValueError):
            ri.add_score_column('review_title', 'text')
Exemplo n.º 5
0
    def test_import_ratings_w_score_processor(self):
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column=0,
                             to_id_column=1,
                             score_column=4,
                             score_processor=NumberNormalizer())

        ratings = ri.import_ratings()

        expected_columns = ['from_id', 'to_id', 'score']
        result_columns = list(ratings.columns)

        self.assertEqual(expected_columns, result_columns)

        score_result = list(ratings['score'])

        self.assertTrue(-1 <= score <= 1 for score in score_result)
Exemplo n.º 6
0
    def test_import_ratings_w_timestamp_index(self):
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column=0,
                             to_id_column=1,
                             score_column=4,
                             timestamp_column=5)

        ratings = ri.import_ratings()

        expected_columns = ['from_id', 'to_id', 'score', 'timestamp']
        result_columns = list(ratings.columns)

        self.assertEqual(expected_columns, result_columns)

        timestamp_result = list(ratings['timestamp'])
        timestamp_expected = [
            row['timestamp'] for row in self.raw_source_content
        ]

        self.assertTrue(
            all(isinstance(timestamp, str) for timestamp in timestamp_result))
        self.assertEqual(timestamp_expected, timestamp_result)
Exemplo n.º 7
0
 def test_import_ratings(self):
     RatingsImporter(
         source=JSONFile(file_path=file_path),
         output_directory="test_ratings",
         rating_configs=[
             RatingsFieldConfig(field_name="review_title",
                                processor=TextBlobSentimentAnalysis()),
             RatingsFieldConfig(field_name="text",
                                processor=TextBlobSentimentAnalysis()),
             RatingsFieldConfig(field_name="stars",
                                processor=NumberNormalizer(min_=0, max_=5))
         ],
         from_field_name="user_id",
         to_field_name="item_id",
         timestamp_field_name="timestamp").import_ratings()
     """
Exemplo n.º 8
0
def rating_config_run(config_dict: Dict):
    rating_configs = []
    for field in config_dict["fields"]:
        class_name = field['processor'].pop('class')
        class_dict = dict_detector(field["processor"])
        rating_configs.append(
            RatingsFieldConfig(
                field_name=field["field_name"],
                processor=runnable_instances[class_name](**class_dict)))
        args = {}
        if config_dict["source_type"] == 'sql':
            pass
    RatingsImporter(source=runnable_instances[config_dict["source_type"]](
        file_path=config_dict["raw_source_path"], **args),
                    output_directory=config_dict["output_directory"],
                    rating_configs=rating_configs,
                    from_field_name=config_dict["from_field_name"],
                    to_field_name=config_dict["to_field_name"],
                    timestamp_field_name=config_dict["timestamp_field_name"]
                    ).import_ratings()
    def test_import_ratings(self):
        file_path = '../../../datasets/test_import_ratings.json'
        try:
            with open(file_path):
                pass
        except FileNotFoundError:
            file_path = 'datasets/test_import_ratings.json'

        print(file_path)
        RatingsImporter(source=JSONFile(file_path=file_path),
                        output_directory="test_ratings",
                        rating_configs=[
                            RatingsFieldConfig(preference_field_name="review_title",
                                               processor=TextBlobSentimentAnalysis()),
                            RatingsFieldConfig(preference_field_name="text",
                                               processor=TextBlobSentimentAnalysis()),
                            RatingsFieldConfig(preference_field_name="stars",
                                               processor=NumberNormalizer(min_=0, max_=5))],
                        from_field_name="user_id",
                        to_field_name="item_id",
                        timestamp_field_name="timestamp").import_ratings()
Exemplo n.º 10
0
# solo esempio, non presente nel dataset
"""
title_review_config = RatingsFieldConfig(
    field_name='review_title',
    processor=TextBlobSentimentAnalysis()
)
"""

points_review_config = RatingsFieldConfig(field_name='points',
                                          processor=NumberNormalizer(min_=1,
                                                                     max_=5))

ratings_importer = RatingsImporter(
    source=CSVFile(ratings_filename),  #cambia
    rating_configs=[points_review_config],
    from_field_name='user_id',
    to_field_name='item_id',
    timestamp_field_name='timestamp',
)

ratings_frame = ratings_importer.import_ratings()
print(ratings_frame)

tfidf_classifier_config = ClassifierRecommender(item_field='Plot',
                                                field_representation='0',
                                                classifier='random_forest')

classifier_recsys_config = RecSysConfig(
    users_directory=users_ca_dir,
    items_directory=items_ca_dir,
    ranking_algorithm=tfidf_classifier_config,
Exemplo n.º 11
0
    def test_exception_import_ratings(self):

        # Test exception column name not present in raw source
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column='not_existent',
                             to_id_column='item_id',
                             score_column='stars')

        with self.assertRaises(KeyError):
            ri.import_ratings()

        # Test exception column index not present in raw source
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column=99,
                             to_id_column='item_id',
                             score_column='stars')

        with self.assertRaises(IndexError):
            ri.import_ratings()

        # Test exception score column can't be converted into float
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column='user_id',
                             to_id_column='item_id',
                             score_column='review_title')

        with self.assertRaises(ValueError):
            ri.import_ratings()
Exemplo n.º 12
0
    def test_ratings_to_csv(self):
        ri = RatingsImporter(source=self.raw_source,
                             from_id_column=0,
                             to_id_column=1,
                             score_column=4)

        ri.import_ratings()

        # Test save
        ri.imported_ratings_to_csv('csv_test/')
        self.assertTrue(os.path.isfile('csv_test/ratings_frame.csv'))

        # Test save first duplicate
        ri.imported_ratings_to_csv('csv_test/')
        self.assertTrue(os.path.isfile('csv_test/ratings_frame (1).csv'))

        # Test save second duplicate
        ri.imported_ratings_to_csv('csv_test/')
        self.assertTrue(os.path.isfile('csv_test/ratings_frame (2).csv'))

        # Test save with overwrite
        ri.imported_ratings_to_csv('csv_test/', overwrite=True)
        self.assertTrue(os.path.isfile('csv_test/ratings_frame.csv'))
        self.assertFalse(os.path.isfile('csv_test/ratings_frame (3).csv'))

        # Test save with custom name
        ri.imported_ratings_to_csv('csv_test/', 'ratings_custom_name')
        self.assertTrue(os.path.isfile('csv_test/ratings_custom_name.csv'))
Exemplo n.º 13
0
from orange_cb_recsys.evaluation import RankingAlgEvalModel, NDCG, FNMeasure, KFoldPartitioning, GiniIndex, DeltaGap, \
    ReportEvalModel
from orange_cb_recsys.recsys import ClassifierRecommender, RecSysConfig, RecSys

ratings_filename = '../../../datasets/ratings_example.json'
items_ca_dir = '../../../orange_cb_recsys/movie_dir1605298315.4501655'
users_ca_dir = '../../../datasets/examples/users_dir'

stars_review_config = RatingsFieldConfig(field_name='stars',
                                         processor=NumberNormalizer(min_=1,
                                                                    max_=5))

ratings_importer = RatingsImporter(
    source=JSONFile(ratings_filename),
    rating_configs=[stars_review_config],
    from_field_name='user_id',
    to_field_name='item_id',
    timestamp_field_name='timestamp',
)

ratings_frame = ratings_importer.import_ratings()
print(ratings_frame)

original_classifier_config = ClassifierRecommender(item_field='Plot',
                                                   field_representation='0',
                                                   classifier='random_forest')

classifier_recsys_config = RecSysConfig(
    users_directory=users_ca_dir,
    items_directory=items_ca_dir,
    ranking_algorithm=original_classifier_config,