Exemplo n.º 1
0
 def test_import_ratings(self):
     RatingsImporter(
         source=JSONFile(file_path=file_path),
         output_directory="test_ratings",
         rating_configs=[
             RatingsFieldConfig(field_name="review_title",
                                processor=TextBlobSentimentAnalysis()),
             RatingsFieldConfig(field_name="text",
                                processor=TextBlobSentimentAnalysis()),
             RatingsFieldConfig(field_name="stars",
                                processor=NumberNormalizer(min_=0, max_=5))
         ],
         from_field_name="user_id",
         to_field_name="item_id",
         timestamp_field_name="timestamp").import_ratings()
     """
Exemplo n.º 2
0
    def test_graph_creation(self):
        # Test multiple graph creation possibilities

        # Import ratings as DataFrame
        ratings_import = RatingsImporter(
            source=CSVFile(ratings_filename),
            rating_configs=[
                RatingsFieldConfig(field_name='points',
                                   processor=NumberNormalizer(min_=1, max_=5))
            ],
            from_field_name='user_id',
            to_field_name='item_id',
            timestamp_field_name='timestamp',
        )
        ratings_frame = ratings_import.import_ratings()

        # Create graph without setting the representation
        # EX. Create graph with properties 'producer' and 'starring' from
        # all exo representation, since there can be multiple exo representation
        # containing the same properties
        g = NXFullGraph(
            source_frame=ratings_frame,
            item_contents_dir=movies_dir,
            user_contents_dir=user_dir,
            item_exo_properties=['producer', 'starring'],
            user_exo_properties=[
                '1'
            ]  # It's the column in the users DAT which identifies the gender
        )

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertGreater(len(g.property_nodes), 0)

        # Create graph without setting properties,
        # so ALL exo properties of the representation 0 will be retrieved
        g = NXFullGraph(source_frame=ratings_frame,
                        item_contents_dir=movies_dir,
                        user_contents_dir=user_dir,
                        item_exo_representation="0",
                        user_exo_representation="0")

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertGreater(len(g.property_nodes), 0)

        # Create graph specifying without properties
        g = NXFullGraph(ratings_frame)

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertEqual(len(g.property_nodes), 0)
Exemplo n.º 3
0
    def test_graph_creation(self):
        # Test multiple graph creation possibilities

        # Import ratings as DataFrame
        ratings_import = RatingsImporter(
            source=CSVFile(ratings_filename),
            rating_configs=[
                RatingsFieldConfig(field_name='points',
                                   processor=NumberNormalizer(min_=1, max_=5))
            ],
            from_field_name='user_id',
            to_field_name='item_id',
            timestamp_field_name='timestamp',
        )
        ratings_frame = ratings_import.import_ratings()

        # Create graph using the property 'starring' from representation '0'
        g = NXTripartiteGraph(ratings_frame,
                              movies_dir,
                              item_exo_representation="0",
                              item_exo_properties=['starring'])

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertGreater(len(g.property_nodes), 0)

        # Create graph specifying only the exo representation
        g = NXTripartiteGraph(ratings_frame,
                              movies_dir,
                              item_exo_representation="0")

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertGreater(len(g.property_nodes), 0)

        # Create graph specifying only the exo representation
        g = NXTripartiteGraph(ratings_frame,
                              movies_dir,
                              item_exo_properties=['starring'])

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertGreater(len(g.property_nodes), 0)

        # Create graph specifying without properties
        g = NXTripartiteGraph(ratings_frame)

        # Simple assert just to make sure the graph is created
        self.assertGreater(len(g.user_nodes), 0)
        self.assertGreater(len(g.item_nodes), 0)
        self.assertEqual(len(g.property_nodes), 0)
    def test_import_ratings(self):
        file_path = '../../../datasets/test_import_ratings.json'
        try:
            with open(file_path):
                pass
        except FileNotFoundError:
            file_path = 'datasets/test_import_ratings.json'

        print(file_path)
        RatingsImporter(source=JSONFile(file_path=file_path),
                        output_directory="test_ratings",
                        rating_configs=[
                            RatingsFieldConfig(preference_field_name="review_title",
                                               processor=TextBlobSentimentAnalysis()),
                            RatingsFieldConfig(preference_field_name="text",
                                               processor=TextBlobSentimentAnalysis()),
                            RatingsFieldConfig(preference_field_name="stars",
                                               processor=NumberNormalizer(min_=0, max_=5))],
                        from_field_name="user_id",
                        to_field_name="item_id",
                        timestamp_field_name="timestamp").import_ratings()
Exemplo n.º 5
0
def rating_config_run(config_dict: Dict):
    rating_configs = []
    for field in config_dict["fields"]:
        class_name = field['processor'].pop('class')
        class_dict = dict_detector(field["processor"])
        rating_configs.append(
            RatingsFieldConfig(
                field_name=field["field_name"],
                processor=runnable_instances[class_name](**class_dict)))
        args = {}
        if config_dict["source_type"] == 'sql':
            pass
    RatingsImporter(source=runnable_instances[config_dict["source_type"]](
        file_path=config_dict["raw_source_path"], **args),
                    output_directory=config_dict["output_directory"],
                    rating_configs=rating_configs,
                    from_field_name=config_dict["from_field_name"],
                    to_field_name=config_dict["to_field_name"],
                    timestamp_field_name=config_dict["timestamp_field_name"]
                    ).import_ratings()
Exemplo n.º 6
0
from orange_cb_recsys.recsys.ranking_algorithms.classifier import SVM
from orange_cb_recsys.recsys.config import RecSysConfig

THIS_DIR = os.path.dirname(os.path.abspath(__file__))
contents_path = os.path.join(THIS_DIR, "../../contents")
datasets_path = os.path.join(THIS_DIR, "../../datasets")
ratings_filename = os.path.join(datasets_path, "examples/new_ratings.csv")
users_dir = os.path.join(contents_path,
                         "examples/ex_1/users_1600355755.1935306")
items_dir = os.path.join(contents_path,
                         "examples/ex_1/movies_1600355972.49884")

t_ratings = RatingsImporter(
    source=CSVFile(ratings_filename),
    rating_configs=[
        RatingsFieldConfig(field_name='points',
                           processor=NumberNormalizer(min_=1, max_=5))
    ],
    from_field_name='user_id',
    to_field_name='item_id',
    timestamp_field_name='timestamp',
).import_ratings()


class TestRankingEvalModel(TestCase):
    def test_fit(self):

        recsys_config = RecSysConfig(users_directory=users_dir,
                                     items_directory=items_dir,
                                     score_prediction_algorithm=None,
                                     ranking_algorithm=ClassifierRecommender(
                                         {'Plot': '0'}, SVM()),
Exemplo n.º 7
0
from orange_cb_recsys.content_analyzer.ratings_manager.rating_processor import NumberNormalizer
from orange_cb_recsys.content_analyzer.ratings_manager.ratings_importer import RatingsFieldConfig, RatingsImporter
from orange_cb_recsys.content_analyzer.ratings_manager.sentiment_analysis import TextBlobSentimentAnalysis
from orange_cb_recsys.content_analyzer.raw_information_source import JSONFile, CSVFile
from orange_cb_recsys.evaluation import RankingAlgEvalModel, NDCG, FNMeasure, KFoldPartitioning, GiniIndex, DeltaGap, \
    ReportEvalModel
from orange_cb_recsys.recsys import ClassifierRecommender, RecSysConfig, RecSys

ratings_filename = '../../../datasets/ratings_example.json'
items_ca_dir = '../../../orange_cb_recsys/movie_dir1605298315.4501655'
users_ca_dir = '../../../datasets/examples/users_dir'

stars_review_config = RatingsFieldConfig(field_name='stars',
                                         processor=NumberNormalizer(min_=1,
                                                                    max_=5))

ratings_importer = RatingsImporter(
    source=JSONFile(ratings_filename),
    rating_configs=[stars_review_config],
    from_field_name='user_id',
    to_field_name='item_id',
    timestamp_field_name='timestamp',
)

ratings_frame = ratings_importer.import_ratings()
print(ratings_frame)

original_classifier_config = ClassifierRecommender(item_field='Plot',
                                                   field_representation='0',
                                                   classifier='random_forest')
Exemplo n.º 8
0
    def test_fit(self):
        """
        item_id_list = [
            'tt0112281',
            'tt0112302',
            'tt0112346',
            'tt0112453',
            'tt0112641',
            'tt0112760',
            'tt0112896',
            'tt0113041',
            'tt0113101',
            'tt0113189',
            'tt0113228',
            'tt0113277',
            'tt0113497',
            'tt0113845',
            'tt0113987',
            'tt0114319',
            'tt0114388',
            'tt0114576',
            'tt0114709',
            'tt0114885',
        ]

        record_list = []
        for i in range(1, 7):
            extract_items = set([x for i, x in enumerate(item_id_list) if np.random.randint(0, 2) == 1 and i < 10])
            for item in extract_items:
                record_list.append((str(i), item, str(np.random.randint(-0, 11) / 10)))

        t_ratings = pd.DataFrame.from_records(record_list, columns=['from_id', 'to_id', 'score'])
        """
        ratings_filename = 'datasets/examples/new_ratings.csv'
        t_ratings = RatingsImporter(
            source=CSVFile(ratings_filename),
            rating_configs=[
                RatingsFieldConfig(field_name='points',
                                   processor=NumberNormalizer(min_=1, max_=5))
            ],
            from_field_name='user_id',
            to_field_name='item_id',
            timestamp_field_name='timestamp',
        ).import_ratings()
        print(t_ratings)

        recsys_config = RecSysConfig(
            users_directory='contents/examples/ex_1/users_1600355755.1935306',
            items_directory='contents/examples/ex_1/movies_1600355972.49884',
            score_prediction_algorithm=None,
            ranking_algorithm=CentroidVector(item_field='Plot',
                                             field_representation='1',
                                             similarity=CosineSimilarity()),
            rating_frame=t_ratings)
        try:
            RankingAlgEvalModel(config=recsys_config,
                                partitioning=KFoldPartitioning(),
                                metric_list=[
                                    Precision(0.4),
                                    Recall(0.4),
                                    FNMeasure(1, 0.4),
                                    MRR(0.4),
                                    NDCG({
                                        0: (-1, 0),
                                        1: (0, 1)
                                    }),
                                    Correlation('pearson'),
                                    Correlation('kendall'),
                                    Correlation('spearman')
                                ]).fit()
        except TypeError:
            pass
        except ValueError:
            pass