def test_filter_last_x_days_recordings(self): mapped_listens_df = utils.read_files_from_HDFS( self.mapped_listens_path) mapped_listens_subset = utils.read_files_from_HDFS( self.mapped_listens_subset_path) recordings_df = create_dataframes.get_recordings_df( mapped_listens_df, {}) users = create_dataframes.get_users_dataframe(mapped_listens_df, {}) mapped_listens_subset = utils.read_files_from_HDFS( self.mapped_listens_subset_path) top_artist_limit = 1 top_artist_df = candidate_sets.get_top_artists(mapped_listens_subset, top_artist_limit, []) _, candidate_set_df = candidate_sets.get_top_artist_candidate_set( top_artist_df, recordings_df, users, mapped_listens_subset) df = candidate_sets.filter_last_x_days_recordings( candidate_set_df, mapped_listens_subset) user_name = [row.user_name for row in df.collect()] self.assertEqual(sorted(user_name), ['rob', 'rob', 'vansika_1']) received_recording_mbid = sorted( [row.mb_recording_mbid for row in df.collect()]) expected_recording_mbid = sorted([ "sf5a56f4-1f83-4681-b319-70a734d0d047", "af5a56f4-1f83-4681-b319-70a734d0d047", "sf5a56f4-1f83-4681-b319-70a734d0d047" ]) self.assertEqual(expected_recording_mbid, received_recording_mbid)
def test_filter_last_x_days_recordings(self): top_artist_limit = 1 top_artist_df = candidate_sets.get_top_artists(self.mapped_listens_df, top_artist_limit, []) _, candidate_set_df = candidate_sets.get_top_artist_candidate_set( top_artist_df, self.recordings_df, self.users_df, self.mapped_listens_subset ) df = candidate_sets.filter_last_x_days_recordings(candidate_set_df, self.mapped_listens_subset) user_name = [row.user_name for row in df.collect()] self.assertEqual(sorted(user_name), ['rob', 'vansika_1']) received_recording_mbid = sorted([row.recording_mbid for row in df.collect()]) expected_recording_mbid = sorted( ["sf5a56f4-1f83-4681-b319-70a734d0d047", "sf5a56f4-1f83-4681-b319-70a734d0d047"] ) self.assertEqual(expected_recording_mbid, received_recording_mbid)