Пример #1
0
    def test_compute_play_duration(self):
        df = pd.DataFrame.from_dict({
            'Event Start Timestamp':
            ['2016-12-02T07:22:34.766Z', '', '2016-10-27T09:45:31.817Z'],
            'Event End Timestamp': [
                '2016-12-02T07:25:34.766Z', '2019-06-19T15:51:09.477Z',
                '2016-10-27T09:47:36.482Z'
            ],
            'Play Duration Milliseconds': [123, 5342, 60000],
            'Media Duration In Milliseconds': [123, 120000, 120000],
            'Played completely': [True, True, False]
        })

        shape_input_df = df.shape
        activity_start = pd.to_datetime(df['Event Start Timestamp'])
        activity_end = pd.to_datetime(df['Event End Timestamp'])
        played_completely = df['Played completely']
        play_duration = df['Play Duration Milliseconds']
        media_duration = df['Media Duration In Milliseconds']
        Parser.compute_play_duration(df, activity_start, activity_end,
                                     played_completely, play_duration,
                                     media_duration)

        self.assertTrue(isinstance(df, pd.DataFrame))
        self.assertEqual(df.shape[0], shape_input_df[0])
        self.assertEqual(df.shape[1], shape_input_df[1] + 1)
        self.assertEqual(int(df.iloc[0, 5]), 3)
        self.assertEqual(df.iloc[1, 5], 2)
        self.assertEqual(df.iloc[2, 5], 1)
Пример #2
0
 def test_parse_library_tracks_infos_df(self):
     library_tracks_df = self.input_df['library_tracks_df']
     shape_input_df = library_tracks_df.shape
     result = Parser.parse_library_tracks_infos_df(library_tracks_df)
     self.assertTrue(isinstance(result, pd.DataFrame))
     self.assertEqual(result.shape[0], shape_input_df[0])
     self.assertEqual(result.shape[1], shape_input_df[1] - 34)
 def setUpClass(cls):
     #we use the test df
     target_files = {
         'identifier_infos_path' : 'test_df/Apple Music Activity/Identifier Information.json.zip',
         'library_tracks_path' : 'test_df/Apple Music Activity/Apple Music Library Tracks.json.zip',
         'library_activity_path': 'test_df/Apple Music Activity/Apple Music Library Activity.json.zip',
         'likes_dislikes_path' : 'test_df/Apple Music Activity/Apple Music Likes and Dislikes.csv',
         'play_activity_path': 'test_df/Apple Music Activity/Apple Music Play Activity.csv'
     }
     cls.input_df = Utility.get_df_from_archive('apple_music_analyser/tests/test_df.zip', target_files)
     cls.parser = Parser(cls.input_df)
     cls.likes_dislikes_df = cls.parser.likes_dislikes_df
     cls.play_activity_df = cls.parser.play_activity_df
     cls.identifier_infos_df = cls.parser.identifier_infos_df
     cls.library_tracks_df = cls.parser.library_tracks_df
     cls.library_activity_df = cls.parser.library_activity_df
     #we process the df
     cls.process = ProcessTracks()
     cls.process.process_library_tracks_df(cls.library_tracks_df)
     cls.process.process_identifier_df(cls.identifier_infos_df)
     cls.process.process_play_df(cls.play_activity_df)
     cls.process.process_likes_dislikes_df(cls.likes_dislikes_df)
     #we extract the useful objects from the process instance
     cls.track_instance_dict = cls.process.track_instance_dict
     cls.artist_tracks_titles = cls.process.artist_tracks_titles
     cls.genres_list = cls.process.genres_list
     cls.items_not_matched = cls.process.items_not_matched
Пример #4
0
 def test_parse_likes_dislikes_df(self):
     likes_dislikes_df = self.input_df['likes_dislikes_df']
     shape_input_df = likes_dislikes_df.shape
     result = Parser.parse_likes_dislikes_df(likes_dislikes_df)
     self.assertTrue(isinstance(result, pd.DataFrame))
     self.assertEqual(result.shape[0], shape_input_df[0])
     self.assertEqual(result.shape[1], shape_input_df[1] + 2)
     self.assertIn('Title', result.columns)
     self.assertIn('Artist', result.columns)
Пример #5
0
    def test_set_partial_listening(self):
        df = pd.DataFrame.from_dict({
            'End Reason Type':
            ['NATURAL_END_OF_TRACK', 'SCRUB_END', 'FAILED_TO_LOAD'],
            'Play Duration Milliseconds': [111, 22222, 1234],
            'Media Duration In Milliseconds': [444, 3, 12345]
        })
        shape_input_df = df.shape
        Parser.set_partial_listening(df, df['End Reason Type'],
                                     df['Play Duration Milliseconds'],
                                     df['Media Duration In Milliseconds'])

        self.assertTrue(isinstance(df, pd.DataFrame))
        self.assertEqual(df.shape[0], shape_input_df[0])
        self.assertEqual(df.shape[1], shape_input_df[1] + 1)
        self.assertIn('Played completely', df.columns)
        self.assertEqual(df.iloc[0, 3], True)
        self.assertEqual(df.iloc[1, 3], True)
        self.assertEqual(df.iloc[2, 3], False)
Пример #6
0
 def test_parse_input_df(self):
     result = Parser.parse_input_df(self.input_df)
     # we expect a dictionary of dataframes
     self.assertTrue(isinstance(result, dict))
     self.assertEqual(len(result), 5)
     self.assertEqual(list(result.keys()), [
         'likes_dislikes_df', 'play_activity_df', 'identifier_infos_df',
         'library_tracks_df', 'library_activity_df'
     ])
     for i in range(len(list(result.values()))):
         self.assertTrue(isinstance(list(result.values())[i], pd.DataFrame))
Пример #7
0
    def test_remove_play_duration_outliers(self):
        df = pd.DataFrame.from_dict({
            'Play duration in minutes': [1, 4, 6, 92, 999],
            'Media Duration In Milliseconds': [123, 345, 678, 720000, 120000],
        })

        shape_input_df = df.shape
        duration_minutes = df['Play duration in minutes']
        media_duration = df['Media Duration In Milliseconds']
        max_duration = 90
        Parser.remove_play_duration_outliers(df, duration_minutes,
                                             media_duration, max_duration)
        self.assertTrue(isinstance(df, pd.DataFrame))
        self.assertEqual(df.shape[0], shape_input_df[0])
        self.assertEqual(df.shape[1], shape_input_df[1])
        self.assertEqual(int(df.iloc[0, 0]), 1)
        self.assertEqual(df.iloc[1, 0], 4)
        self.assertEqual(df.iloc[2, 0], 6)
        self.assertEqual(df.iloc[3, 0], 12)
        self.assertEqual(df.iloc[4, 0], 2)
Пример #8
0
 def __init__(self, input_df):
     self.input_df = input_df
     self.parser = Parser(input_df)
     self.source_dataframes = self.parser.source_dataframes
     self.likes_dislikes_df = None
     self.play_activity_df = None
     self.identifier_infos_df = None
     self.library_tracks_df = None
     self.library_activity_df = None
     self.get_df_from_source()
     self.process_tracks = ProcessTracks()
     self.process_tracks_in_df()
     self.track_summary_objects = TrackSummaryObject(self.process_tracks.track_instance_dict, self.process_tracks.artist_tracks_titles, self.process_tracks.genres_list, self.process_tracks.items_not_matched)
     self.df_visualization = self.build_df_visualisation()
Пример #9
0
 def test_parse_library_activity_df(self):
     library_activity_df = self.input_df['library_activity_df']
     shape_input_df = library_activity_df.shape
     result = Parser.parse_library_activity_df(library_activity_df)
     self.assertTrue(isinstance(result, pd.DataFrame))
     self.assertEqual(result.shape[0], shape_input_df[0])
     self.assertEqual(result.shape[1], shape_input_df[1] + 8)
     self.assertIn('Transaction date time', result.columns)
     self.assertIn('Transaction Year', result.columns)
     self.assertIn('Transaction Month', result.columns)
     self.assertIn('Transaction DOM', result.columns)
     self.assertIn('Transaction DOW', result.columns)
     self.assertIn('Transaction HOD', result.columns)
     self.assertIn('Transaction HOD', result.columns)
     self.assertIn('Transaction Agent', result.columns)
     self.assertIn('Transaction Agent Model', result.columns)
 def setUp(self):
     target_files = {
         'identifier_infos_path' : 'test_df/Apple Music Activity/Identifier Information.json.zip',
         'library_tracks_path' : 'test_df/Apple Music Activity/Apple Music Library Tracks.json.zip',
         'library_activity_path': 'test_df/Apple Music Activity/Apple Music Library Activity.json.zip',
         'likes_dislikes_path' : 'test_df/Apple Music Activity/Apple Music Likes and Dislikes.csv',
         'play_activity_path': 'test_df/Apple Music Activity/Apple Music Play Activity.csv'
     }
     self.input_df = Utility.get_df_from_archive('apple_music_analyser/tests/test_df.zip', target_files)
     self.parser = Parser(self.input_df)
     self.likes_dislikes_df = self.parser.likes_dislikes_df
     self.play_activity_df = self.parser.play_activity_df
     self.identifier_infos_df = self.parser.identifier_infos_df
     self.library_tracks_df = self.parser.library_tracks_df
     self.library_activity_df = self.parser.library_activity_df
     self.process = ProcessTracks()
     self.track_instance = Track(self.process.increment)
Пример #11
0
 def test_parse_play_activity_df(self):
     play_activity_df = self.input_df['play_activity_df']
     shape_input_df = play_activity_df.shape
     result = Parser.parse_play_activity_df(play_activity_df)
     self.assertTrue(isinstance(result, pd.DataFrame))
     #we expect 1 row with date before 2015 to be dropped
     self.assertEqual(result.shape[0], shape_input_df[0] - 1)
     # 24 columns are dropped, and 10 added (those tested below)
     self.assertEqual(result.shape[1], shape_input_df[1] - 14)
     self.assertIn('Play date time', result.columns)
     self.assertIn('Play Year', result.columns)
     self.assertIn('Play Month', result.columns)
     self.assertIn('Play DOM', result.columns)
     self.assertIn('Play DOW', result.columns)
     self.assertIn('Play HOD', result.columns)
     self.assertIn('Play HOD', result.columns)
     self.assertIn('Played completely', result.columns)
     self.assertIn('Track origin', result.columns)
     self.assertIn('Play duration in minutes', result.columns)
Пример #12
0
 def test_init_Parser(self):
     target_files = {
         'identifier_infos_path':
         'test_df/Apple Music Activity/Identifier Information.json.zip',
         'library_tracks_path':
         'test_df/Apple Music Activity/Apple Music Library Tracks.json.zip',
         'library_activity_path':
         'test_df/Apple Music Activity/Apple Music Library Activity.json.zip',
         'likes_dislikes_path':
         'test_df/Apple Music Activity/Apple Music Likes and Dislikes.csv',
         'play_activity_path':
         'test_df/Apple Music Activity/Apple Music Play Activity.csv'
     }
     input_df = Utility.get_df_from_archive(
         'apple_music_analyser/tests/test_df.zip', target_files)
     shape_input_likes_dislikes_df = input_df['likes_dislikes_df'].shape
     shape_input_play_activity_df = input_df['play_activity_df'].shape
     shape_input_identifier_infos_df = input_df['identifier_infos_df'].shape
     shape_input_library_tracks_df = input_df['library_tracks_df'].shape
     shape_input_library_activity_df = input_df['library_activity_df'].shape
     result = Parser(input_df)
     self.assertTrue(isinstance(result.likes_dislikes_df, pd.DataFrame))
     self.assertEqual(result.likes_dislikes_df.shape,
                      (shape_input_likes_dislikes_df[0],
                       shape_input_likes_dislikes_df[1] + 2))
     self.assertTrue(isinstance(result.play_activity_df, pd.DataFrame))
     self.assertEqual(result.play_activity_df.shape,
                      (shape_input_play_activity_df[0] - 1,
                       shape_input_play_activity_df[1] - 14))
     self.assertTrue(isinstance(result.identifier_infos_df, pd.DataFrame))
     self.assertEqual(result.identifier_infos_df.shape,
                      (shape_input_identifier_infos_df[0],
                       shape_input_identifier_infos_df[1]))
     self.assertTrue(isinstance(result.library_tracks_df, pd.DataFrame))
     self.assertEqual(result.library_tracks_df.shape,
                      (shape_input_library_tracks_df[0],
                       shape_input_library_tracks_df[1] - 34))
     self.assertTrue(isinstance(result.library_activity_df, pd.DataFrame))
     self.assertEqual(result.library_activity_df.shape,
                      (shape_input_library_activity_df[0],
                       shape_input_library_activity_df[1] + 8))
Пример #13
0
 def test_parse_source_dataframes_bad_input(self):
     parser = Parser(self.input_df)
     parser.source_dataframes = {}
     self.assertRaises(Exception, parser.parse_source_dataframes)