示例#1
0
    def __init__(self, verbose=2, locals=locals, globals=globals):
        super(ActionMaskCallback, self).__init__(verbose)
        
        self.model = None  # type: BaseRLModel
        self.training_env = None  # type: Union[gym.Env, VecEnv, None]
        self.n_calls = 0  # Number of time the callback was called # type: int
        self.num_timesteps = 0  # type: int
        self.locals = None  # type: Dict[str, Any]
        self.globals = None  # type: Dict[str, Any]

        # Set all actions to valid.
        self.action_space = MultiDiscrete([3, 21, 21, 21, 21])
        self.action_mask = mask(self.action_space)
        self.action_mask[0] = [1, 0, 0]

        states = ['start', 'start_spring_forward', 'lift_leg', 'plant_leg', 'switch_leg']
        self.gait = Walk()
        self.gait.num_timesteps = self.num_timesteps
        self.gait.action_mask = mask(self.action_space)
        self.gait.action_mask[0] = [1, 0, 0]
        self.gait.swinging_leg = 'right'
        self.gait.terminal = False
        self.gait.start_spring_forward_reward = 0
        self.gait.start = True
        self.gait.step_count = 0
        self.gait.step_flag = False
        self.gait.log = False
        self.gait.starting_position = 4
        self.gait.teaching = None

        machine = Machine(self.gait, states=states, send_event=True, initial='start')

        # Setup for Pure Selector Orchestration
        machine.add_transition('move', 'start', 'plant_leg', conditions='brain_transition_plant_leg', prepare=['log_iteration', 'set_mask'], before=['reset_iter_counter'], after=['set_mask'])
        machine.add_transition('move', 'lift_leg', 'plant_leg', conditions='brain_transition_plant_leg', unless='is_start', prepare=['log_iteration', 'set_mask'], before=['reset_iter_counter'], after=['set_mask'])
        machine.add_transition('move', 'plant_leg', 'switch_leg', conditions='is_swinging_leg_planted', prepare=['log_iteration', 'set_mask'], before=['reset_iter_counter', 'switch_legs', 'increment_step_count'], after=['set_mask'])

        #machine.add_transition('move', 'switch_leg', 'lift_leg', conditions='brain_transition_lift_leg', prepare=['log_iteration', 'set_mask'], before=['reset_iter_counter'])
        machine.add_transition('move', 'switch_leg', 'plant_leg', conditions='brain_transition_lift_leg', prepare=['log_iteration', 'set_mask'], before=['reset_iter_counter'], after=['set_mask'])

        machine.add_transition('reset', 'start', 'start', before=['reinstate', 'reset_step_counter', 'set_mask'])
        machine.add_transition('reset', 'plant_leg', 'start', before=['reinstate', 'reset_step_counter', 'set_mask'])
        machine.add_transition('reset', 'switch_leg', 'start', before=['reinstate', 'reset_step_counter', 'set_mask'])
        machine.add_transition('reset', 'lift_leg', 'start', before=['reinstate', 'reset_step_counter', 'set_mask'])

        #print('callback init')
        """
示例#2
0
    def _on_training_start(self) -> None:
        """
        This method is called before the first rollout starts.
        """
        #print('train start')
        self.engine.valid_actions = mask(self.action_space)
        self.engine.valid_actions[0] = [1, 0,
                                        0]  # Set to start in gait phase 1.
        self.engine.action = 1

        # Query the state to pass to def facts.
        sim = self.training_env.env_method('get_state')
        #print(sim[0]['state'][8], sim[0]['state'][13], sim[0]['action'])
        left_leg = {
            'side': 'left',
            'contact': sim[0]['state'][8],
            'position': sim[0]['legs'][1].position[0],
            'hip_angle': sim[0]['state'][4],
            'knee_angle': sim[0]['state'][6],
            'hip_height': sim[0]['state'][26],
            'knee_height': sim[0]['state'][24]
        }
        right_leg = {
            'side': 'right',
            'contact': sim[0]['state'][13],
            'position': sim[0]['legs'][3].position[0],
            'hip_angle': sim[0]['state'][9],
            'knee_angle': sim[0]['state'][11],
            'hip_height': sim[0]['state'][27],
            'knee_height': sim[0]['state'][25]
        }

        self.engine.reset(action=sim[0]['action'], left_position=left_leg['position'], left_contact=left_leg['contact'], \
            right_position=right_leg['position'], right_contact=right_leg['contact'], \
                left_hip_angle=left_leg['hip_angle'], left_knee_angle=left_leg['knee_angle'], \
                    right_hip_angle=right_leg['hip_angle'], right_knee_angle=right_leg['knee_angle'], \
                        left_hip_height=left_leg['hip_height'], right_hip_height=right_leg['hip_height'], \
                            left_knee_height=left_leg['knee_height'], right_knee_height=right_leg['knee_height'])
        #self.mask(self.action_space)()
        #self.engine.action = sim[0]['action']
        self.engine.run()
        self.action_mask = self.engine.valid_actions
        self.training_env.env_method('set_infos', self.engine.valid_actions)

        return True
示例#3
0
    def __init__(self, verbose=2, locals=locals, globals=globals):
        super(ActionMaskCallback, self).__init__(verbose)
        # Those variables will be accessible in the callback
        # (they are defined in the base class)
        # The RL model
        self.model = None  # type: BaseRLModel
        # An alias for self.model.get_env(), the environment used for training
        self.training_env = None  # type: Union[gym.Env, VecEnv, None]
        # Number of time the callback was called
        self.n_calls = 0  # type: int
        self.num_timesteps = 0  # type: int
        # local and global variables
        self.locals = None  # type: Dict[str, Any]
        self.globals = None  # type: Dict[str, Any]
        # The logger object, used to report things in the terminal
        # self.logger = None  # type: logger.Logger
        # # Sometimes, for event callback, it is useful
        # # to have access to the parent object
        # self.parent = None  # type: Optional[BaseCallback]
        self.engine = Machine_Teaching()
        self.action_mask = None

        # Set all actions to valid.
        self.action_space = MultiDiscrete([3, 21, 21, 21, 21])
        self.engine.action = 1  # gait phase instead of reading list, starts with 1, []
        self.engine.valid_actions = mask(self.action_space)
        self.engine.valid_actions[0] = [1, 0,
                                        0]  # Set to start in gait phase 1.
        self.engine.value_list = [
            -1, -0.9, -0.8, -0.7, -0.6, -0.5, -0.4, -0.3, -0.2, -0.1, 0, 0.1,
            0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1
        ]
        #self.engine.valid_actions[0] = [1, 0, 0]
        self.engine.mask_left_hip = []
        self.engine.mask_left_knee = []
        self.engine.mask_right_hip = []
        self.engine.mask_right_knee = []
        self.engine.mask_width = 1
        self.engine.sim = {}
        self.engine.possible_states = []
示例#4
0
    def reset(self):
        self.valid_actions = mask(self.action_space)
        self.valid_actions[0] = [1, 0, 0]
        self.terminal = False
        self._destroy()
        self.world.contactListener_bug_workaround = ContactDetector(self)
        self.world.contactListener = self.world.contactListener_bug_workaround
        self.game_over = False
        self.prev_shaping = None
        self.scroll = 0.0
        self.lidar_render = 0

        W = VIEWPORT_W / SCALE
        H = VIEWPORT_H / SCALE

        self._generate_terrain(self.hardcore)
        self._generate_clouds()

        init_x = TERRAIN_STEP * TERRAIN_STARTPAD / 2
        init_y = TERRAIN_HEIGHT + 2 * LEG_H
        self.hull = self.world.CreateDynamicBody(position=(init_x, init_y),
                                                 fixtures=HULL_FD)
        self.hull.color1 = (0.5, 0.4, 0.9)
        self.hull.color2 = (0.3, 0.3, 0.5)
        self.hull.ApplyForceToCenter(
            (self.np_random.uniform(-INITIAL_RANDOM, INITIAL_RANDOM), 0), True)

        self.legs = []
        self.joints = []
        for i in [
                -1, +1
        ]:  # Original position is right leg slightly forward, left leg slightly back.
            leg = self.world.CreateDynamicBody(
                position=(init_x, init_y - LEG_H / 2 - LEG_DOWN),
                angle=(i * 0.15),  # *0.05
                fixtures=LEG_FD)
            leg.color1 = (0.6 - i / 10., 0.3 - i / 10., 0.5 - i / 10.)
            leg.color2 = (0.4 - i / 10., 0.2 - i / 10., 0.3 - i / 10.)
            rjd = revoluteJointDef(
                bodyA=self.hull,
                bodyB=leg,
                localAnchorA=(0, LEG_DOWN),
                localAnchorB=(0, LEG_H / 2),
                enableMotor=True,
                enableLimit=True,
                maxMotorTorque=MOTORS_TORQUE,
                motorSpeed=i,
                lowerAngle=-0.8,
                upperAngle=1.1,
            )
            self.legs.append(leg)
            self.joints.append(self.world.CreateJoint(rjd))

            lower = self.world.CreateDynamicBody(
                position=(init_x, init_y - LEG_H * 3 / 2 - LEG_DOWN),
                angle=(i * 0.15),  # *0.05
                fixtures=LOWER_FD)
            lower.color1 = (0.6 - i / 10., 0.3 - i / 10., 0.5 - i / 10.)
            lower.color2 = (0.4 - i / 10., 0.2 - i / 10., 0.3 - i / 10.)
            rjd = revoluteJointDef(
                bodyA=leg,
                bodyB=lower,
                localAnchorA=(0, -LEG_H / 2),
                localAnchorB=(0, LEG_H / 2),
                enableMotor=True,
                enableLimit=True,
                maxMotorTorque=MOTORS_TORQUE,
                motorSpeed=1,
                lowerAngle=-1.6,
                upperAngle=-0.1,
            )
            lower.ground_contact = False
            self.legs.append(lower)
            self.joints.append(self.world.CreateJoint(rjd))

        self.drawlist = self.terrain + self.legs + [self.hull]

        self.counter = 0

        #print('in reset', self.counter)

        class LidarCallback(Box2D.b2.rayCastCallback):
            def ReportFixture(self, fixture, point, normal, fraction):
                if (fixture.filterData.categoryBits & 1) == 0:
                    return -1
                self.p2 = point
                self.fraction = fraction
                return fraction

        self.lidar = [LidarCallback() for _ in range(10)]

        self.terminal = False
        self.action = []
        self.state = {}
        empty_observation = [init_x, init_x, init_x, init_x, init_x]
        self.position_history = deque(empty_observation)
        self.state_history = deque([None, None, None, None, None])
        self.leg_history = deque([
            (self.legs[0].position[0], self.legs[2].position[0]),
            (self.legs[0].position[0], self.legs[2].position[0]),
            (self.legs[0].position[0], self.legs[2].position[0]),
            (self.legs[0].position[0], self.legs[2].position[0]),
            (self.legs[0].position[0], self.legs[2].position[0])
        ])
        if self.state_machine is not None and self.state_machine.state != 'start':
            self.state_machine.reset()  # reset the state of the state machine.
            self.state_machine.num_timesteps = 0  # reset the iteration counter.
            self.state_machine.swinging_leg = 'right'
            self.state_machine.start = True
            self.state_machine.action_mask = mask(self.action_space)
            #print('resetting state machine')
            self.state_machine.action_mask[0] = [1, 0, 0]
            #print('reset ', self.state_machine.num_timesteps)
        elif self.state_machine is not None:
            self.state_machine.num_timesteps = 0  # reset the iteration counter.
            self.state_machine.swinging_leg = 'right'
            #print('reset ', self.state_machine.num_timesteps)
        return self.step(np.array([0, 10, 10, 10, 10]))[0]
示例#5
0
import imageio

from gym.spaces import MultiDiscrete
import stable_baselines.common.walker_action_mask as walker
from stable_baselines import PPO2, A2C
from stable_baselines.common.evaluation import evaluate_policy
from stable_baselines.common.vec_env import DummyVecEnv
from curiosity_mask.util import create_dummy_action_mask as mask

# Create environment.
env = DummyVecEnv([walker.BipedalWalker])

brain = PPO2.load("walker_brain_1a")

# Evaluate the agent.
#mean_reward, n_steps = evaluate_policy(brain, brain.get_env(), n_eval_episodes=1)
#print(mean_reward)

action_space = MultiDiscrete([3, 21, 21, 21, 21])
action_mask = mask(action_space)

# Predict from the trained agent and record animated gif.
#images = []
obs, done, action_masks = env.reset(), [False], []
for i in range(1000):
    action, _states = brain.predict(obs, action_mask=action_masks)
    obs, rewards, dones, info = env.step(action)
    env.render()
    #images.append(env.render(mode='rgb_array'))

#imageio.mimsave('./logs/walker-ppo2-curiosity-mask.gif', images, fps=29)
    def __init__(self, verbose=2, locals=locals, globals=globals):
        super(ActionMaskCallback, self).__init__(verbose)
        # Those variables will be accessible in the callback
        # (they are defined in the base class)
        # The RL model
        self.model = None  # type: BaseRLModel
        # An alias for self.model.get_env(), the environment used for training
        self.training_env = None  # type: Union[gym.Env, VecEnv, None]
        # Number of time the callback was called
        self.n_calls = 0  # type: int
        self.num_timesteps = 0  # type: int
        # local and global variables
        self.locals = None  # type: Dict[str, Any]
        self.globals = None  # type: Dict[str, Any]
        # The logger object, used to report things in the terminal
        # self.logger = None  # type: logger.Logger
        # # Sometimes, for event callback, it is useful
        # # to have access to the parent object
        # self.parent = None  # type: Optional[BaseCallback]

        # Set all actions to valid.
        self.action_space = MultiDiscrete([3, 21, 21, 21, 21])
        self.action_mask = mask(self.action_space)
        self.action_mask[0] = [1, 0, 0]

        states = [
            'lift_leg', 'fuzzy_transition_1', 'plant_leg', 'switch_leg',
            'fuzzy_transition_2'
        ]
        # Fuzzy transition 1 determines how long to lift swinging leg before planting it.
        # Fuzzy transition 2 determines how long to swing switched leg before lifing it.
        self.gait = Walk()
        self.gait.num_timesteps = self.num_timesteps
        self.gait.action_mask = mask(self.action_space)
        self.gait.action_mask[0] = [1, 0, 0]
        self.gait.swinging_leg = 'right'
        self.gait.terminal = False
        self.gait.start = True
        machine = Machine(self.gait,
                          states=states,
                          send_event=True,
                          initial='start')
        #machine.add_transition('move', 'lift_leg', 'plant_leg', conditions='is_swinging_leg_lifted', prepare='set_mask_gait_1', before=['set_mask_gait_2', 'reset_step_counter'])
        #machine.add_transition('move', 'plant_leg', 'switch_leg', conditions='is_swinging_leg_planted', prepare='set_mask_gait_2', before=['set_mask_gait_2', 'reset_step_counter'])
        #machine.add_transition('move', 'switch_leg', 'lift_leg', conditions='is_swinging_leg_leading', prepare='set_mask_gait_3', before=['set_mask_gait_2', 'reset_step_counter'])

        machine.add_transition('move',
                               'start',
                               'fuzzy_transition_1',
                               conditions='is_swinging_leg_lifted',
                               prepare='set_mask_start',
                               before=['set_mask_start', 'reset_step_counter'])
        machine.add_transition(
            'move',
            'lift_leg',
            'fuzzy_transition_1',
            conditions='is_swinging_leg_lifted',
            unless='is_start',
            prepare='set_mask_gait_1',
            before=['set_mask_gait_1', 'reset_step_counter'])
        machine.add_transition(
            'move',
            'fuzzy_transition_1',
            'plant_leg',
            conditions='done_lifting_swinging_leg',
            prepare=['set_mask_gait_1', 'set_fuzzy_decision1'],
            before=['terminate', 'reset_step_counter', 'set_mask_gait_2'])
        machine.add_transition(
            'move',
            'plant_leg',
            'switch_leg',
            conditions='is_swinging_leg_planted',
            prepare='set_mask_gait_2',
            before=['reset_step_counter', 'switch_legs', 'set_mask_gait_3'])
        machine.add_transition('move',
                               'switch_leg',
                               'fuzzy_transition_2',
                               conditions='is_swinging_leg_leading',
                               prepare='set_mask_gait_3',
                               before=['set_mask_gait_3'])
        machine.add_transition(
            'move',
            'fuzzy_transition_2',
            'lift_leg',
            conditions='start_lifting_swinging_leg',
            prepare=['set_mask_gait_3', 'set_fuzzy_decision2'],
            before=['reset_step_counter', 'set_mask_gait_1'])

        machine.add_transition('reset',
                               'fuzzy_transition_1',
                               'start',
                               before=['reinstate', 'set_mask_gait_1'])
        machine.add_transition('reset',
                               'plant_leg',
                               'start',
                               before=['reinstate', 'set_mask_gait_1'])
        machine.add_transition('reset',
                               'switch_leg',
                               'start',
                               before=['reinstate', 'set_mask_gait_1'])
        machine.add_transition('reset',
                               'fuzzy_transition_2',
                               'start',
                               before=['reinstate', 'set_mask_gait_1'])
        machine.add_transition('reset',
                               'lift_leg',
                               'start',
                               before=['reinstate', 'set_mask_gait_1'])
        #print('callback init')
        self.ui = UI()
        self.ui.event, self.ui.values = self.ui.window.read()
        if self.ui.event == 'Cancel':
            self.ui.window.close()
        elif self.ui.event == 'Reset':
            self.training_env.env_method('reset')
        elif self.ui.event in (None, 'Submit'):
            self.gait.teaching = self.translate_ui(self.ui.values)
示例#7
0
    def set_mask(self, event):

        """
        if event.kwargs.get('action') is not None and len(event.kwargs.get('action')) == 5:
            gait = event.kwargs.get('action')[0]
        else:
            gait = 0
        #"""
        if event.kwargs.get('action') is None:
            gait = 0
        #print(self.state)
        if self.state == 'start' or self.state == 'lift_leg':
            gait = 0
            gait_action_mask = [1, 0, 0]
        if self.state == 'plant_leg':
            gait = 1
            gait_action_mask = [0, 1, 0]
        if self.state == 'switch_leg':
            gait = 2
            gait_action_mask = [0, 0, 1]

        teaching = self.teaching # event.kwargs.get('teaching')
        gait_ref = str(gait+1)
        #gait_action_mask = self.action_mask[0]
        self.action_mask = mask(MultiDiscrete([3, 21, 21, 21, 21]))
        self.action_mask[0] = gait_action_mask

        # Flip bits to teach the strategy for this gait. (Left Hip, Left Knee, Right, Hip, Right Knee)
        start_crouch_length = 6
        switch_crouch_length = 8

        if self.swinging_leg == 'left':

            ranges = {'left_hip': {'min': teaching['gait-' + gait_ref + '-swinging-hip-min'], 'max': teaching['gait-' + gait_ref + '-swinging-hip-max']},
                    'left_knee': {'min': teaching['gait-' + gait_ref + '-swinging-knee-min'], 'max': teaching['gait-' + gait_ref + '-swinging-knee-max']}, 
                    'right_hip': {'min': teaching['gait-' + gait_ref + '-planted-hip-min'], 'max': teaching['gait-' + gait_ref + '-planted-hip-max']}, 
                    'right_knee': {'min': teaching['gait-' + gait_ref + '-planted-knee-min'], 'max': teaching['gait-' + gait_ref + '-planted-knee-max']}
                }
            #"""
            if ((self.state == 'start' or self.state == 'lift_leg') and self.num_timesteps <= start_crouch_length) or (self.state == 'switch_leg' and self.num_timesteps <= switch_crouch_length):

                ranges['right_hip']['min'] = 10

            if ((self.state == 'start' or self.state == 'lift_leg') and self.num_timesteps > start_crouch_length) or (self.state == 'switch_leg' and self.num_timesteps > switch_crouch_length):

                ranges['right_hip']['max'] = 10
            #"""
            self.action_mask = gait_mask(gait, ranges, self.action_mask)

        if self.swinging_leg == 'right':

            ranges = {'left_hip': {'min': teaching['gait-' + gait_ref + '-planted-hip-min'], 'max': teaching['gait-' + gait_ref + '-planted-hip-max']},
                'left_knee': {'min': teaching['gait-' + gait_ref + '-planted-knee-min'], 'max': teaching['gait-' + gait_ref + '-planted-knee-max']}, 
                'right_hip': {'min': teaching['gait-' + gait_ref + '-swinging-hip-min'], 'max': teaching['gait-' + gait_ref + '-swinging-hip-max']}, 
                'right_knee': {'min': teaching['gait-' + gait_ref + '-swinging-knee-min'], 'max': teaching['gait-' + gait_ref + '-swinging-knee-max']}
            }
            #"""
            if ((self.state == 'start' or self.state == 'lift_leg') and self.num_timesteps <= start_crouch_length) or (self.state == 'switch_leg' and self.num_timesteps <= switch_crouch_length):

                ranges['left_hip']['min'] = 10

            if ((self.state == 'start' or self.state == 'lift_leg') and self.num_timesteps > start_crouch_length) or (self.state == 'switch_leg' and self.num_timesteps > switch_crouch_length):

                ranges['left_hip']['max'] = 10
            #"""
            #print('gait mask details: ', gait, ranges, self.action_mask[1])
            self.action_mask = gait_mask(gait, ranges, self.action_mask)