def upload_test_mapped_listens_to_HDFS(cls): partial_listen_df = create_dataframes.get_listens_for_training_model_window( cls.date, cls.date, {}, cls.listens_path) mapping_df = utils.read_files_from_HDFS(cls.mapping_path) mapped_listens = create_dataframes.get_mapped_artist_and_recording_mbids( partial_listen_df, mapping_df) utils.save_parquet(mapped_listens, cls.mapped_listens_path)
def upload_test_mapped_listens_to_HDFS(cls): partial_listen_df = create_dataframes.get_listens_for_training_model_window( cls.date, cls.date, {}, LISTENS_PATH) mapping_df = utils.read_files_from_HDFS(MAPPING_PATH) mapped_df = create_dataframes.get_mapped_artist_and_recording_mbids( partial_listen_df, mapping_df) utils.save_parquet(mapped_df, MAPPED_LISTENS_PATH)
def test_get_mapped_artist_and_recording_mbids(self): partial_listen_df = create_dataframes.get_listens_for_training_model_window( self.date, self.date, {}, self.listens_path) mapping_df = utils.read_files_from_HDFS(self.mapping_path) mapped_listens = create_dataframes.get_mapped_artist_and_recording_mbids( partial_listen_df, mapping_df) self.assertEqual(mapped_listens.count(), 1) self.assertListEqual(sorted(self.get_mapped_listens().columns), sorted(mapped_listens.columns)) status = utils.path_exists(path.MAPPED_LISTENS) self.assertTrue(status)
def test_get_mapped_artist_and_recording_mbids(self): partial_listen_df = create_dataframes.get_listens_for_training_model_window( self.date, self.date, {}, LISTENS_PATH) mapping_df = utils.read_files_from_HDFS(MAPPING_PATH) mapped_df = create_dataframes.get_mapped_artist_and_recording_mbids( partial_listen_df, mapping_df) self.assertEqual(mapped_df.count(), 1) complete_listen_col = [ 'artist_msid', 'artist_name', 'listened_at', 'recording_msid', 'release_mbid', 'release_msid', 'release_name', 'tags', 'track_name', 'user_name', 'mb_artist_credit_id', 'mb_artist_credit_mbids', 'mb_recording_mbid', 'mb_release_mbid', 'msb_artist_msid', 'msb_recording_msid', 'msb_release_msid' ] self.assertListEqual(complete_listen_col, mapped_df.columns) status = utils.path_exists(path.MAPPED_LISTENS) self.assertTrue(status)
def test_get_mapped_artist_and_recording_mbids(self): partial_listen_df = create_dataframes.get_listens_for_training_model_window( self.date, self.date, {}, self.listens_path) mapping_df = utils.read_files_from_HDFS(self.mapping_path) mapped_listens = create_dataframes.get_mapped_artist_and_recording_mbids( partial_listen_df, mapping_df) self.assertEqual(mapped_listens.count(), 8) cols = [ 'listened_at', 'mb_artist_credit_id', 'mb_artist_credit_mbids', 'mb_recording_mbid', 'mb_release_mbid', 'msb_artist_credit_name_matchable', 'msb_recording_name_matchable', 'user_name' ] self.assertListEqual(sorted(cols), sorted(mapped_listens.columns)) status = utils.path_exists(path.MAPPED_LISTENS) self.assertTrue(status)