示例#1
0
    def test_compute_play_duration(self):
        df = pd.DataFrame.from_dict({
            'Event Start Timestamp':
            ['2016-12-02T07:22:34.766Z', '', '2016-10-27T09:45:31.817Z'],
            'Event End Timestamp': [
                '2016-12-02T07:25:34.766Z', '2019-06-19T15:51:09.477Z',
                '2016-10-27T09:47:36.482Z'
            ],
            'Play Duration Milliseconds': [123, 5342, 60000],
            'Media Duration In Milliseconds': [123, 120000, 120000],
            'Played completely': [True, True, False]
        })

        shape_input_df = df.shape
        activity_start = pd.to_datetime(df['Event Start Timestamp'])
        activity_end = pd.to_datetime(df['Event End Timestamp'])
        played_completely = df['Played completely']
        play_duration = df['Play Duration Milliseconds']
        media_duration = df['Media Duration In Milliseconds']
        Parser.compute_play_duration(df, activity_start, activity_end,
                                     played_completely, play_duration,
                                     media_duration)

        self.assertTrue(isinstance(df, pd.DataFrame))
        self.assertEqual(df.shape[0], shape_input_df[0])
        self.assertEqual(df.shape[1], shape_input_df[1] + 1)
        self.assertEqual(int(df.iloc[0, 5]), 3)
        self.assertEqual(df.iloc[1, 5], 2)
        self.assertEqual(df.iloc[2, 5], 1)
示例#2
0
 def test_parse_library_tracks_infos_df(self):
     library_tracks_df = self.input_df['library_tracks_df']
     shape_input_df = library_tracks_df.shape
     result = Parser.parse_library_tracks_infos_df(library_tracks_df)
     self.assertTrue(isinstance(result, pd.DataFrame))
     self.assertEqual(result.shape[0], shape_input_df[0])
     self.assertEqual(result.shape[1], shape_input_df[1] - 34)
 def setUpClass(cls):
     #we use the test df
     target_files = {
         'identifier_infos_path' : 'test_df/Apple Music Activity/Identifier Information.json.zip',
         'library_tracks_path' : 'test_df/Apple Music Activity/Apple Music Library Tracks.json.zip',
         'library_activity_path': 'test_df/Apple Music Activity/Apple Music Library Activity.json.zip',
         'likes_dislikes_path' : 'test_df/Apple Music Activity/Apple Music Likes and Dislikes.csv',
         'play_activity_path': 'test_df/Apple Music Activity/Apple Music Play Activity.csv'
     }
     cls.input_df = Utility.get_df_from_archive('apple_music_analyser/tests/test_df.zip', target_files)
     cls.parser = Parser(cls.input_df)
     cls.likes_dislikes_df = cls.parser.likes_dislikes_df
     cls.play_activity_df = cls.parser.play_activity_df
     cls.identifier_infos_df = cls.parser.identifier_infos_df
     cls.library_tracks_df = cls.parser.library_tracks_df
     cls.library_activity_df = cls.parser.library_activity_df
     #we process the df
     cls.process = ProcessTracks()
     cls.process.process_library_tracks_df(cls.library_tracks_df)
     cls.process.process_identifier_df(cls.identifier_infos_df)
     cls.process.process_play_df(cls.play_activity_df)
     cls.process.process_likes_dislikes_df(cls.likes_dislikes_df)
     #we extract the useful objects from the process instance
     cls.track_instance_dict = cls.process.track_instance_dict
     cls.artist_tracks_titles = cls.process.artist_tracks_titles
     cls.genres_list = cls.process.genres_list
     cls.items_not_matched = cls.process.items_not_matched
示例#4
0
 def test_parse_likes_dislikes_df(self):
     likes_dislikes_df = self.input_df['likes_dislikes_df']
     shape_input_df = likes_dislikes_df.shape
     result = Parser.parse_likes_dislikes_df(likes_dislikes_df)
     self.assertTrue(isinstance(result, pd.DataFrame))
     self.assertEqual(result.shape[0], shape_input_df[0])
     self.assertEqual(result.shape[1], shape_input_df[1] + 2)
     self.assertIn('Title', result.columns)
     self.assertIn('Artist', result.columns)
示例#5
0
    def test_set_partial_listening(self):
        df = pd.DataFrame.from_dict({
            'End Reason Type':
            ['NATURAL_END_OF_TRACK', 'SCRUB_END', 'FAILED_TO_LOAD'],
            'Play Duration Milliseconds': [111, 22222, 1234],
            'Media Duration In Milliseconds': [444, 3, 12345]
        })
        shape_input_df = df.shape
        Parser.set_partial_listening(df, df['End Reason Type'],
                                     df['Play Duration Milliseconds'],
                                     df['Media Duration In Milliseconds'])

        self.assertTrue(isinstance(df, pd.DataFrame))
        self.assertEqual(df.shape[0], shape_input_df[0])
        self.assertEqual(df.shape[1], shape_input_df[1] + 1)
        self.assertIn('Played completely', df.columns)
        self.assertEqual(df.iloc[0, 3], True)
        self.assertEqual(df.iloc[1, 3], True)
        self.assertEqual(df.iloc[2, 3], False)
示例#6
0
 def test_parse_input_df(self):
     result = Parser.parse_input_df(self.input_df)
     # we expect a dictionary of dataframes
     self.assertTrue(isinstance(result, dict))
     self.assertEqual(len(result), 5)
     self.assertEqual(list(result.keys()), [
         'likes_dislikes_df', 'play_activity_df', 'identifier_infos_df',
         'library_tracks_df', 'library_activity_df'
     ])
     for i in range(len(list(result.values()))):
         self.assertTrue(isinstance(list(result.values())[i], pd.DataFrame))
示例#7
0
    def test_remove_play_duration_outliers(self):
        df = pd.DataFrame.from_dict({
            'Play duration in minutes': [1, 4, 6, 92, 999],
            'Media Duration In Milliseconds': [123, 345, 678, 720000, 120000],
        })

        shape_input_df = df.shape
        duration_minutes = df['Play duration in minutes']
        media_duration = df['Media Duration In Milliseconds']
        max_duration = 90
        Parser.remove_play_duration_outliers(df, duration_minutes,
                                             media_duration, max_duration)
        self.assertTrue(isinstance(df, pd.DataFrame))
        self.assertEqual(df.shape[0], shape_input_df[0])
        self.assertEqual(df.shape[1], shape_input_df[1])
        self.assertEqual(int(df.iloc[0, 0]), 1)
        self.assertEqual(df.iloc[1, 0], 4)
        self.assertEqual(df.iloc[2, 0], 6)
        self.assertEqual(df.iloc[3, 0], 12)
        self.assertEqual(df.iloc[4, 0], 2)
示例#8
0
 def __init__(self, input_df):
     self.input_df = input_df
     self.parser = Parser(input_df)
     self.source_dataframes = self.parser.source_dataframes
     self.likes_dislikes_df = None
     self.play_activity_df = None
     self.identifier_infos_df = None
     self.library_tracks_df = None
     self.library_activity_df = None
     self.get_df_from_source()
     self.process_tracks = ProcessTracks()
     self.process_tracks_in_df()
     self.track_summary_objects = TrackSummaryObject(self.process_tracks.track_instance_dict, self.process_tracks.artist_tracks_titles, self.process_tracks.genres_list, self.process_tracks.items_not_matched)
     self.df_visualization = self.build_df_visualisation()
示例#9
0
 def test_parse_library_activity_df(self):
     library_activity_df = self.input_df['library_activity_df']
     shape_input_df = library_activity_df.shape
     result = Parser.parse_library_activity_df(library_activity_df)
     self.assertTrue(isinstance(result, pd.DataFrame))
     self.assertEqual(result.shape[0], shape_input_df[0])
     self.assertEqual(result.shape[1], shape_input_df[1] + 8)
     self.assertIn('Transaction date time', result.columns)
     self.assertIn('Transaction Year', result.columns)
     self.assertIn('Transaction Month', result.columns)
     self.assertIn('Transaction DOM', result.columns)
     self.assertIn('Transaction DOW', result.columns)
     self.assertIn('Transaction HOD', result.columns)
     self.assertIn('Transaction HOD', result.columns)
     self.assertIn('Transaction Agent', result.columns)
     self.assertIn('Transaction Agent Model', result.columns)
 def setUp(self):
     target_files = {
         'identifier_infos_path' : 'test_df/Apple Music Activity/Identifier Information.json.zip',
         'library_tracks_path' : 'test_df/Apple Music Activity/Apple Music Library Tracks.json.zip',
         'library_activity_path': 'test_df/Apple Music Activity/Apple Music Library Activity.json.zip',
         'likes_dislikes_path' : 'test_df/Apple Music Activity/Apple Music Likes and Dislikes.csv',
         'play_activity_path': 'test_df/Apple Music Activity/Apple Music Play Activity.csv'
     }
     self.input_df = Utility.get_df_from_archive('apple_music_analyser/tests/test_df.zip', target_files)
     self.parser = Parser(self.input_df)
     self.likes_dislikes_df = self.parser.likes_dislikes_df
     self.play_activity_df = self.parser.play_activity_df
     self.identifier_infos_df = self.parser.identifier_infos_df
     self.library_tracks_df = self.parser.library_tracks_df
     self.library_activity_df = self.parser.library_activity_df
     self.process = ProcessTracks()
     self.track_instance = Track(self.process.increment)
示例#11
0
 def test_parse_play_activity_df(self):
     play_activity_df = self.input_df['play_activity_df']
     shape_input_df = play_activity_df.shape
     result = Parser.parse_play_activity_df(play_activity_df)
     self.assertTrue(isinstance(result, pd.DataFrame))
     #we expect 1 row with date before 2015 to be dropped
     self.assertEqual(result.shape[0], shape_input_df[0] - 1)
     # 24 columns are dropped, and 10 added (those tested below)
     self.assertEqual(result.shape[1], shape_input_df[1] - 14)
     self.assertIn('Play date time', result.columns)
     self.assertIn('Play Year', result.columns)
     self.assertIn('Play Month', result.columns)
     self.assertIn('Play DOM', result.columns)
     self.assertIn('Play DOW', result.columns)
     self.assertIn('Play HOD', result.columns)
     self.assertIn('Play HOD', result.columns)
     self.assertIn('Played completely', result.columns)
     self.assertIn('Track origin', result.columns)
     self.assertIn('Play duration in minutes', result.columns)
示例#12
0
 def test_init_Parser(self):
     target_files = {
         'identifier_infos_path':
         'test_df/Apple Music Activity/Identifier Information.json.zip',
         'library_tracks_path':
         'test_df/Apple Music Activity/Apple Music Library Tracks.json.zip',
         'library_activity_path':
         'test_df/Apple Music Activity/Apple Music Library Activity.json.zip',
         'likes_dislikes_path':
         'test_df/Apple Music Activity/Apple Music Likes and Dislikes.csv',
         'play_activity_path':
         'test_df/Apple Music Activity/Apple Music Play Activity.csv'
     }
     input_df = Utility.get_df_from_archive(
         'apple_music_analyser/tests/test_df.zip', target_files)
     shape_input_likes_dislikes_df = input_df['likes_dislikes_df'].shape
     shape_input_play_activity_df = input_df['play_activity_df'].shape
     shape_input_identifier_infos_df = input_df['identifier_infos_df'].shape
     shape_input_library_tracks_df = input_df['library_tracks_df'].shape
     shape_input_library_activity_df = input_df['library_activity_df'].shape
     result = Parser(input_df)
     self.assertTrue(isinstance(result.likes_dislikes_df, pd.DataFrame))
     self.assertEqual(result.likes_dislikes_df.shape,
                      (shape_input_likes_dislikes_df[0],
                       shape_input_likes_dislikes_df[1] + 2))
     self.assertTrue(isinstance(result.play_activity_df, pd.DataFrame))
     self.assertEqual(result.play_activity_df.shape,
                      (shape_input_play_activity_df[0] - 1,
                       shape_input_play_activity_df[1] - 14))
     self.assertTrue(isinstance(result.identifier_infos_df, pd.DataFrame))
     self.assertEqual(result.identifier_infos_df.shape,
                      (shape_input_identifier_infos_df[0],
                       shape_input_identifier_infos_df[1]))
     self.assertTrue(isinstance(result.library_tracks_df, pd.DataFrame))
     self.assertEqual(result.library_tracks_df.shape,
                      (shape_input_library_tracks_df[0],
                       shape_input_library_tracks_df[1] - 34))
     self.assertTrue(isinstance(result.library_activity_df, pd.DataFrame))
     self.assertEqual(result.library_activity_df.shape,
                      (shape_input_library_activity_df[0],
                       shape_input_library_activity_df[1] + 8))
示例#13
0
 def test_parse_source_dataframes_bad_input(self):
     parser = Parser(self.input_df)
     parser.source_dataframes = {}
     self.assertRaises(Exception, parser.parse_source_dataframes)