Exemplos de load_action_space em Python, exemplos de markov.agent_ctrl.utils.load_action_space em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: training_agent_ctrl.py Projeto: Chuba-Oraka/workshop-ML-20200822

 def __init__(self, agent_name, path_to_json):
     '''agent_name - String containing the name of the agent
        path_to_json - String containing absolute path to model meta data json containing
                       the action space
     '''
     # Store the name of the agent used to set agents position on the track
     self._agent_name_ = agent_name
     #Create default reward parameters
     self._action_space_, _ = load_action_space(path_to_json)

Exemplo n.º 2

0

Exibir arquivo

    def __init__(self, agent_name, model_metadata):
        """constructor for the training agent ctrl

        Args:
            agent_name (str): name of the agent
            model_metadata (ModelMetadata): object containing the details in the model metadata json file
        """
        # Store the name of the agent used to set agents position on the track
        self._agent_name_ = agent_name
        #Create default reward parameters
        self._action_space_ = load_action_space(model_metadata)
        self._model_metadata_ = model_metadata

Exemplo n.º 3

0

Exibir arquivo

 def __init__(self, config_dict, run_phase_sink, metrics):
     '''agent_name - String containing the name of the agent
        config_dict - Dictionary containing all the keys in ConfigParams
        run_phase_sink - Sink to recieve notification of a change in run phase
     '''
     # reset rules manager
     self._metrics = metrics
     self._is_continuous = config_dict[const.ConfigParams.IS_CONTINUOUS.value]
     self._is_reset = False
     self._pause_count = 0
     self._reset_rules_manager = construct_reset_rules_manager(config_dict)
     self._ctrl_status = dict()
     self._ctrl_status[AgentCtrlStatus.AGENT_PHASE.value] = AgentPhase.RUN.value
     self._config_dict = config_dict
     self._number_of_resets = config_dict[const.ConfigParams.NUMBER_OF_RESETS.value]
     self._off_track_penalty = config_dict[const.ConfigParams.OFF_TRACK_PENALTY.value]
     self._collision_penalty = config_dict[const.ConfigParams.COLLISION_PENALTY.value]
     self._pause_end_time = 0.0
     self._reset_count = 0
     # simapp_version speed scale
     self._speed_scale_factor_ = get_speed_factor(config_dict[const.ConfigParams.VERSION.value])
     # Store the name of the agent used to set agents position on the track
     self._agent_name_ = config_dict[const.ConfigParams.AGENT_NAME.value]
     # Store the name of the links in the agent, this should be const
     self._agent_link_name_list_ = config_dict[const.ConfigParams.LINK_NAME_LIST.value]
     # Store the reward function
     self._reward_ = config_dict[const.ConfigParams.REWARD.value]
     self._track_data_ = TrackData.get_instance()
     # Create publishers for controlling the car
     self._velocity_pub_dict_ = OrderedDict()
     self._steering_pub_dict_ = OrderedDict()
     for topic in config_dict[const.ConfigParams.VELOCITY_LIST.value]:
         self._velocity_pub_dict_[topic] = rospy.Publisher(topic, Float64, queue_size=1)
     for topic in config_dict[const.ConfigParams.STEERING_LIST.value]:
         self._steering_pub_dict_[topic] = rospy.Publisher(topic, Float64, queue_size=1)
     #Create default reward parameters
     self._reward_params_ = const.RewardParam.make_default_param()
     #Creat the default metrics dictionary
     self._step_metrics_ = StepMetrics.make_default_metric()
     # State variable to track if the car direction has been reversed
     self._reverse_dir_ = False
     # Dictionary of bools indicating starting position behavior
     self._start_pos_behavior_ = \
         {'change_start' : config_dict[const.ConfigParams.CHANGE_START.value],
          'alternate_dir' : config_dict[const.ConfigParams.ALT_DIR.value]}
     # Dictionary to track the previous way points
     self._prev_waypoints_ = {'prev_point' : Point(0, 0), 'prev_point_2' : Point(0, 0)}
     # Dictionary containing some of the data for the agent
     self._data_dict_ = {'max_progress': 0.0,
                         'current_progress': 0.0,
                         'prev_progress': 0.0,
                         'steps': 0.0,
                         'start_ndist': 0.0}
     #Load the action space
     self._action_space_, self._json_actions_ = \
         load_action_space(config_dict[const.ConfigParams.ACTION_SPACE_PATH.value])
     #! TODO evaluate if this is the best way to reset the car
     rospy.wait_for_service(SET_MODEL_STATE)
     rospy.wait_for_service(GET_MODEL_STATE)
     self.set_model_state = ServiceProxyWrapper(SET_MODEL_STATE, SetModelState)
     self.get_model_client = ServiceProxyWrapper(GET_MODEL_STATE, GetModelState)
     # Adding the reward data publisher
     self.reward_data_pub = RewardDataPublisher(self._agent_name_, self._json_actions_)
     # init time
     self.last_time = 0.0
     self.curr_time = 0.0
     # subscriber to time to update camera position
     self.camera_manager = CameraManager.get_instance()
     # True if the agent is in the training phase
     self._is_training_ = False
     rospy.Subscriber('/clock', Clock, self._update_sim_time)
     # Register to the phase sink
     run_phase_sink.register(self)
     # Make sure velicty and angle are set to 0
     send_action(self._velocity_pub_dict_, self._steering_pub_dict_, 0.0, 0.0)
     start_pose = self._track_data_._center_line_.interpolate_pose(self._data_dict_['start_ndist'] * self._track_data_.get_track_length(),
                                                                   reverse_dir=self._reverse_dir_,
                                                                   finite_difference=FiniteDifference.FORWARD_DIFFERENCE)
     self._track_data_.initialize_object(self._agent_name_, start_pose, ObstacleDimensions.BOT_CAR_DIMENSION)
     self.car_model_state = self.get_model_client(self._agent_name_, '')
     self._reset_agent(reset_pos=const.ResetPos.START_POS.value)

Exemplo n.º 4

0

Exibir arquivo

Arquivo: rollout_agent_ctrl.py Projeto: pandeyankit83/amazon-sagemaker-example

    def __init__(self, config_dict, run_phase_sink, metrics):
        '''config_dict (dict): containing all the keys in ConfigParams
           run_phase_sink (RunPhaseSubject): Sink to receive notification of a change in run phase
           metrics (EvalMetrics/TrainingMetrics): Training or evaluation metrics
        '''
        # reset rules manager
        self._metrics = metrics
        self._is_continuous = config_dict[
            const.ConfigParams.IS_CONTINUOUS.value]
        self._reset_rules_manager = construct_reset_rules_manager(config_dict)
        self._ctrl_status = dict()
        self._ctrl_status[
            AgentCtrlStatus.AGENT_PHASE.value] = AgentPhase.RUN.value
        self._config_dict = config_dict
        self._done_condition = config_dict.get(
            const.ConfigParams.DONE_CONDITION.value, any)
        self._number_of_resets = config_dict[
            const.ConfigParams.NUMBER_OF_RESETS.value]
        self._off_track_penalty = config_dict[
            const.ConfigParams.OFF_TRACK_PENALTY.value]
        self._collision_penalty = config_dict[
            const.ConfigParams.COLLISION_PENALTY.value]
        self._pause_duration = 0.0
        self._reset_count = 0
        self._curr_crashed_object_name = ''
        # simapp_version speed scale
        self._speed_scale_factor_ = get_speed_factor(
            config_dict[const.ConfigParams.VERSION.value])
        # Store the name of the agent used to set agents position on the track
        self._agent_name_ = config_dict[const.ConfigParams.AGENT_NAME.value]
        # Set start lane. This only support for two agents H2H race
        self._agent_idx_ = get_racecar_idx(self._agent_name_)
        # Get track data
        self._track_data_ = TrackData.get_instance()
        if self._agent_idx_ is not None:
            self._start_lane_ = self._track_data_.inner_lane \
                if self._agent_idx_ % 2 else self._track_data_.outer_lane
        else:
            self._start_lane_ = self._track_data_.center_line
        # Store the name of the links in the agent, this should be const
        self._agent_link_name_list_ = config_dict[
            const.ConfigParams.LINK_NAME_LIST.value]
        # Store the reward function
        self._reward_ = config_dict[const.ConfigParams.REWARD.value]
        # Create publishers for controlling the car
        self._velocity_pub_dict_ = OrderedDict()
        self._steering_pub_dict_ = OrderedDict()
        for topic in config_dict[const.ConfigParams.VELOCITY_LIST.value]:
            self._velocity_pub_dict_[topic] = rospy.Publisher(topic,
                                                              Float64,
                                                              queue_size=1)
        for topic in config_dict[const.ConfigParams.STEERING_LIST.value]:
            self._steering_pub_dict_[topic] = rospy.Publisher(topic,
                                                              Float64,
                                                              queue_size=1)
        #Create default reward parameters
        self._reward_params_ = const.RewardParam.make_default_param()
        #Create the default metrics dictionary
        self._step_metrics_ = StepMetrics.make_default_metric()
        # Dictionary of bools indicating starting position behavior
        self._start_pos_behavior_ = \
            {'change_start' : config_dict[const.ConfigParams.CHANGE_START.value],
             'alternate_dir' : config_dict[const.ConfigParams.ALT_DIR.value]}
        # Dictionary to track the previous way points
        self._prev_waypoints_ = {
            'prev_point': Point(0, 0),
            'prev_point_2': Point(0, 0)
        }

        # Normalized distance of new start line from the original start line of the track.
        start_ndist = 0.0

        # Normalized start position offset w.r.t to start_ndist, which is the start line of the track.
        start_pos_offset = config_dict.get(
            const.ConfigParams.START_POSITION.value, 0.0)
        self._start_line_ndist_offset = start_pos_offset / self._track_data_.get_track_length(
        )

        # Dictionary containing some of the data for the agent
        # - During the reset call, every value except start_ndist will get wiped out by self._clear_data
        #   (reset happens prior to every episodes begin)
        # - If self._start_line_ndist_offset is not 0 (usually some minus value),
        #   then initial current_progress suppose to be non-zero (usually some minus value) as progress
        #   suppose to be based on start_ndist.
        # - This will be correctly calculated by first call of utils.compute_current_prog function.
        #   As prev_progress will be initially 0.0 and physical position is not at start_ndist,
        #   utils.compute_current_prog will return negative progress if self._start_line_ndist_offset is negative value
        #   (meaning behind start line) and will return positive progress if self._start_line_ndist_offset is
        #   positive value (meaning ahead of start line).
        self._data_dict_ = {
            'max_progress': 0.0,
            'current_progress': 0.0,
            'prev_progress': 0.0,
            'steps': 0.0,
            'start_ndist': start_ndist,
            'prev_car_pose': 0.0
        }

        #Load the action space
        self._action_space_, self._json_actions_ = \
            load_action_space(config_dict[const.ConfigParams.ACTION_SPACE_PATH.value])
        #! TODO evaluate if this is the best way to reset the car
        # Adding the reward data publisher
        self.reward_data_pub = RewardDataPublisher(self._agent_name_,
                                                   self._json_actions_)
        # subscriber to time to update camera position
        self.camera_manager = CameraManager.get_instance()
        # True if the agent is in the training phase
        self._is_training_ = False
        # Register to the phase sink
        run_phase_sink.register(self)
        # Make sure velocity and angle are set to 0
        send_action(self._velocity_pub_dict_, self._steering_pub_dict_, 0.0,
                    0.0)

        # start_dist should be hypothetical start line (start_ndist) plus
        # start position offset (start_line_ndist_offset).
        start_pose = self._start_lane_.interpolate_pose(
            (self._data_dict_['start_ndist'] + self._start_line_ndist_offset) *
            self._track_data_.get_track_length(),
            finite_difference=FiniteDifference.FORWARD_DIFFERENCE)
        self._track_data_.initialize_object(self._agent_name_, start_pose, \
                                            ObstacleDimensions.BOT_CAR_DIMENSION)

        self.make_link_points = lambda link_state: Point(
            link_state.pose.position.x, link_state.pose.position.y)
        self.reference_frames = ['' for _ in self._agent_link_name_list_]

        self._pause_car_model_pose = None
        self._park_position = DEFAULT_PARK_POSITION
        AbstractTracker.__init__(self, TrackerPriority.HIGH)

Exemplo n.º 5

0

Exibir arquivo

    def __init__(self, config_dict, run_phase_sink, metrics):
        '''agent_name - String containing the name of the agent
           config_dict - Dictionary containing all the keys in ConfigParams
           run_phase_sink - Sink to recieve notification of a change in run phase
        '''
        # reset rules manager
        self._metrics = metrics
        self._is_continuous = config_dict[
            const.ConfigParams.IS_CONTINUOUS.value]
        self._is_reset = False
        self._reset_rules_manager = construct_reset_rules_manager(config_dict)
        self._ctrl_status = dict()
        self._ctrl_status[
            AgentCtrlStatus.AGENT_PHASE.value] = AgentPhase.RUN.value
        self._config_dict = config_dict
        self._done_condition = config_dict.get(
            const.ConfigParams.DONE_CONDITION.value, any)
        self._number_of_resets = config_dict[
            const.ConfigParams.NUMBER_OF_RESETS.value]
        self._off_track_penalty = config_dict[
            const.ConfigParams.OFF_TRACK_PENALTY.value]
        self._collision_penalty = config_dict[
            const.ConfigParams.COLLISION_PENALTY.value]
        self._pause_duration = 0.0
        self._reset_count = 0
        self._curr_crashed_object_name = None
        self._last_crashed_object_name = None
        # simapp_version speed scale
        self._speed_scale_factor_ = get_speed_factor(
            config_dict[const.ConfigParams.VERSION.value])
        # Store the name of the agent used to set agents position on the track
        self._agent_name_ = config_dict[const.ConfigParams.AGENT_NAME.value]
        # Set start lane. This only support for two agents H2H race
        self._agent_idx_ = get_racecar_idx(self._agent_name_)
        # Get track data
        self._track_data_ = TrackData.get_instance()
        if self._agent_idx_ is not None:
            self._start_lane_ = self._track_data_.inner_lane \
                if self._agent_idx_ % 2 else self._track_data_.outer_lane
        else:
            self._start_lane_ = self._track_data_.center_line
        # Store the name of the links in the agent, this should be const
        self._agent_link_name_list_ = config_dict[
            const.ConfigParams.LINK_NAME_LIST.value]
        # Store the reward function
        self._reward_ = config_dict[const.ConfigParams.REWARD.value]
        # Create publishers for controlling the car
        self._velocity_pub_dict_ = OrderedDict()
        self._steering_pub_dict_ = OrderedDict()
        for topic in config_dict[const.ConfigParams.VELOCITY_LIST.value]:
            self._velocity_pub_dict_[topic] = rospy.Publisher(topic,
                                                              Float64,
                                                              queue_size=1)
        for topic in config_dict[const.ConfigParams.STEERING_LIST.value]:
            self._steering_pub_dict_[topic] = rospy.Publisher(topic,
                                                              Float64,
                                                              queue_size=1)
        #Create default reward parameters
        self._reward_params_ = const.RewardParam.make_default_param()
        #Creat the default metrics dictionary
        self._step_metrics_ = StepMetrics.make_default_metric()
        # Dictionary of bools indicating starting position behavior
        self._start_pos_behavior_ = \
            {'change_start' : config_dict[const.ConfigParams.CHANGE_START.value],
             'alternate_dir' : config_dict[const.ConfigParams.ALT_DIR.value]}
        # Dictionary to track the previous way points
        self._prev_waypoints_ = {
            'prev_point': Point(0, 0),
            'prev_point_2': Point(0, 0)
        }
        # Normalize start position in meter to normalized start_ndist in percentage
        start_ndist = config_dict.get(const.ConfigParams.START_POSITION.value, 0.0) / \
            self._track_data_.get_track_length()
        # Dictionary containing some of the data for the agent
        self._data_dict_ = {
            'max_progress': 0.0,
            'current_progress': 0.0,
            'prev_progress': 0.0,
            'steps': 0.0,
            'start_ndist': start_ndist
        }
        #Load the action space
        self._action_space_, self._json_actions_ = \
            load_action_space(config_dict[const.ConfigParams.ACTION_SPACE_PATH.value])
        #! TODO evaluate if this is the best way to reset the car
        # Adding the reward data publisher
        self.reward_data_pub = RewardDataPublisher(self._agent_name_,
                                                   self._json_actions_)
        # subscriber to time to update camera position
        self.camera_manager = CameraManager.get_instance()
        # True if the agent is in the training phase
        self._is_training_ = False
        # Register to the phase sink
        run_phase_sink.register(self)
        # Make sure velicty and angle are set to 0
        send_action(self._velocity_pub_dict_, self._steering_pub_dict_, 0.0,
                    0.0)
        start_pose = self._track_data_.center_line.interpolate_pose(
            self._data_dict_['start_ndist'] *
            self._track_data_.get_track_length(),
            finite_difference=FiniteDifference.FORWARD_DIFFERENCE)
        self._track_data_.initialize_object(self._agent_name_, start_pose, \
                                            ObstacleDimensions.BOT_CAR_DIMENSION)

        self.make_link_points = lambda link_state: Point(
            link_state.pose.position.x, link_state.pose.position.y)
        self.reference_frames = ['' for _ in self._agent_link_name_list_]

        self._pause_car_model_pose = None
        self._park_position = DEFAULT_PARK_POSITION
        AbstractTracker.__init__(self, TrackerPriority.HIGH)

Exemplo n.º 6

0

Exibir arquivo

Arquivo: rollout_agent_ctrl.py Projeto: pmsharkKOR/sagemaker-rl-kr

 def __init__(self, config_dict):
     '''agent_name - String containing the name of the agent
        config_dict - Dictionary containing all the keys in ConfigParams
     '''
     # simapp_version speed scale
     self._speed_scale_factor_ = get_speed_factor(
         config_dict[const.ConfigParams.VERSION.value])
     # Store the name of the agent used to set agents position on the track
     self._agent_name_ = config_dict[const.ConfigParams.AGENT_NAME.value]
     # Store the name of the links in the agent, this should be const
     self._agent_link_name_list_ = config_dict[
         const.ConfigParams.LINK_NAME_LIST.value]
     # Store the reward function
     self._reward_ = config_dict[const.ConfigParams.REWARD.value]
     self._track_data_ = TrackData.get_instance()
     # Create publishers for controlling the car
     self._velocity_pub_dict_ = OrderedDict()
     self._steering_pub_dict_ = OrderedDict()
     for topic in config_dict[const.ConfigParams.VELOCITY_LIST.value]:
         self._velocity_pub_dict_[topic] = rospy.Publisher(topic,
                                                           Float64,
                                                           queue_size=1)
     for topic in config_dict[const.ConfigParams.STEERING_LIST.value]:
         self._steering_pub_dict_[topic] = rospy.Publisher(topic,
                                                           Float64,
                                                           queue_size=1)
     #Create default reward parameters
     self._reward_params_ = const.RewardParam.make_default_param()
     #Creat the default metrics dictionary
     self._step_metrics_ = StepMetrics.make_default_metric()
     # State variable to track if the car direction has been reversed
     self._reverse_dir_ = False
     # Dictionary of bools indicating starting position behavior
     self._start_pos_behavior_ = \
         {'change_start' : config_dict[const.ConfigParams.CHANGE_START.value],
          'alternate_dir' : config_dict[const.ConfigParams.ALT_DIR.value]}
     # Dictionary to track the previous way points
     self._prev_waypoints_ = {
         'prev_point': Point(0, 0),
         'prev_point_2': Point(0, 0)
     }
     # Dictionary containing some of the data for the agent
     self._data_dict_ = {
         'prev_progress': 0.0,
         'steps': 0.0,
         'start_ndist': 0.0
     }
     #Load the action space
     self._action_space_, self._json_actions_ = \
         load_action_space(config_dict[const.ConfigParams.ACTION_SPACE_PATH.value])
     #! TODO evaluate if this is the best way to reset the car
     rospy.wait_for_service(SET_MODEL_STATE)
     rospy.wait_for_service(GET_MODEL_STATE)
     self.set_model_state = ServiceProxyWrapper(SET_MODEL_STATE,
                                                SetModelState)
     self.get_model_client = ServiceProxyWrapper(GET_MODEL_STATE,
                                                 GetModelState)
     # Adding the reward data publisher
     self.reward_data_pub = RewardDataPublisher(self._agent_name_,
                                                self._json_actions_)
     # init time
     self.last_time = 0.0
     self.curr_time = 0.0
     # subscriber to time to update camera position
     camera_types = [camera for camera in CameraType]
     self.camera_manager = CameraManager(camera_types=camera_types)
     rospy.Subscriber('/clock', Clock, self.update_camera)
     # Make sure velicty and angle are set to 0
     send_action(self._velocity_pub_dict_, self._steering_pub_dict_, 0.0,
                 0.0)