def score(self, expert_evaluation, expert_trajectory: Trajectory, streaming_enviroment, trace_list, video_csv_list, add_data=False): """ Wrapper for the base scoring function :param expert_evaluation: :param expert_trajectory: :param streaming_enviroment: :param trace_list: Which traces did we evaluate :param video_csv_list: Which videos did we evaluate :param add_data: :return: """ expert_trajectory.convert_list() behavioural_cloning_trace_generator_testing = TrajectoryVideoStreaming(self, streaming_enviroment, trace_list=trace_list, video_csv_list=video_csv_list) state_t = np.array([self.classifier.extract_features_observation(state_t) for state_t, _, _ in tqdm(expert_trajectory.trajectory_list, desc='transforming')]) state_t = pd.DataFrame(state_t, columns=self.classifier.extract_features_names()) self.impute_NaN_inplace(state_t) expert_action = expert_trajectory.trajectory_action_t_arr approx_action = self.classifier.predict(state_t) expert_action = expert_action.ravel() behavioural_cloning_evaluation, behavioural_cloning_evaluation_trajectory = behavioural_cloning_trace_generator_testing.create_trajectories( random_action_probability=0, cores_avail=1) return self.score_comparison(expert_evaluation=expert_evaluation, expert_trajectory=expert_trajectory, expert_action=expert_action, approx_evaluation=behavioural_cloning_evaluation, approx_trajectory=behavioural_cloning_evaluation_trajectory, approx_action=approx_action, add_data=add_data)
def score(self, expert_evaluation, expert_trajectory: Trajectory, streaming_enviroment, trace_list, video_csv_list, add_data=False): expert_trajectory.convert_list() behavioural_cloning_trace_generator_testing = TrajectoryVideoStreaming(self, streaming_enviroment, trace_list=trace_list, video_csv_list=video_csv_list) state_t_testing = expert_trajectory.trajectory_state_t_arr state_t_future_testing = expert_trajectory.trajectory_state_t_future expert_action = expert_trajectory.trajectory_action_t_arr approx_action = self.policy_network.model.predict([state_t_testing, state_t_future_testing]).argmax(-1) expert_action = expert_action.ravel() behavioural_cloning_evaluation, behavioural_cloning_evaluation_trajectory = behavioural_cloning_trace_generator_testing.create_trajectories( random_action_probability=0, cores_avail=1) return self.score_comparison(expert_evaluation=expert_evaluation, expert_trajectory=expert_trajectory, expert_action=expert_action, approx_evaluation=behavioural_cloning_evaluation, approx_trajectory=behavioural_cloning_evaluation_trajectory, approx_action=approx_action, add_data=add_data)
def score(self, expert_evaluation, expert_trajectory: Trajectory, streaming_enviroment, trace_list, video_csv_list, add_data=False): expert_trajectory.convert_list() behavioural_cloning_trace_generator_testing = TrajectoryVideoStreaming(self, streaming_enviroment, trace_list=trace_list, video_csv_list=video_csv_list) state_t = np.array([self.abr_policy_learner.extract_features_observation(state_t) for state_t, _, _ in tqdm(expert_trajectory.trajectory_list, desc='transforming')]) state_t = pd.DataFrame(state_t, columns=self.abr_policy_learner.extract_features_names()) expert_action = expert_trajectory.trajectory_action_t_arr approx_action = self.abr_policy_learner.predict(state_t) behavioural_cloning_evaluation, behavioural_cloning_evaluation_trajectory = behavioural_cloning_trace_generator_testing.create_trajectories( random_action_probability=0, cores_avail=1) return self.score_comparison(expert_evaluation=expert_evaluation, expert_trajectory=expert_trajectory, expert_action=expert_action, approx_evaluation=behavioural_cloning_evaluation, approx_trajectory=behavioural_cloning_evaluation_trajectory, approx_action=approx_action, add_data=add_data)