def _reset_(self): """Resets the environment episode. Moves the arm to either fixed reference or random position and resets targets and target generators """ print("Resetting") if(self._move_shape_ == 'circle'): startpoint = np.zeros(3) startpoint[self._circle_plane_-2] += self._circle_radius_ startpoint += self._circle_centrepoint_ x_target = np.zeros(3) np.copyto(x_target, startpoint) self._move_generator_ = self._circle_generator_(self._circle_plane_) elif(self._move_shape_ == 'line'): x_target = self._line_midpoint_ self._move_generator_ = self._line_generator_(self._line_dir_) np.copyto(self._x_target_, x_target) self._target_ = self._x_target_[self._end_effector_indices] self._action_ = self._rand_obj_.uniform(self._action_low, self._action_high) self._cmd_prev_ = np.zeros(len(self._action_low)) # to be used with derivative control of velocity if self._reset_type != 'none': if self._reset_type == 'random': reset_angles, _ = self._pick_random_angles_() elif self._reset_type == 'zero': reset_angles = self._q_ref[self._joint_indices] self._reset_arm(reset_angles) rand_state_array_type, rand_state_array_size, rand_state_array = utils.get_random_state_array( self._rand_obj_.get_state() ) np.copyto(self._shared_rstate_array_, np.frombuffer(rand_state_array, dtype=rand_state_array_type)) print("Reset done")
def _reset_(self): """Resets the environment episode. Moves the arm to either fixed reference or random position and generates a new target from _target_generator_. """ print("Resetting") x_target = self._target_generator_.__next__() np.copyto(self._x_target_, x_target) self._target_ = self._x_target_[self._end_effector_indices] self._action_ = self._rand_obj_.uniform(self._action_low, self._action_high) self._cmd_prev_ = np.zeros(len(self._action_low)) # to be used with derivative control of velocity if self._reset_type != 'none': if self._reset_type == 'random': reset_angles, _ = self._pick_random_angles_() elif self._reset_type == 'zero': reset_angles = self._q_ref[self._joint_indices] self._reset_arm(reset_angles) rand_state_array_type, rand_state_array_size, rand_state_array = utils.get_random_state_array( self._rand_obj_.get_state() ) np.copyto(self._shared_rstate_array_, np.frombuffer(rand_state_array, dtype=rand_state_array_type)) print("Reset done")
def _reset_(self): """ Resets the environment episode. Moves the DXL to either fixed reference or random position and generates a new target within a bounding box. """ print("Resetting") if self.reset_type == 'zero': self._reset_pos_.value = self.reset_pos_center elif self.reset_type == 'random': self._reset_pos_.value = self._rand_obj_.uniform( low=self.angle_low, high=self.angle_high) self._target_pos_.value = self._rand_obj_.uniform(low=self.angle_low, high=self.angle_high) error_prior = 0 integral = 0 present_pos = 0.0 # Once in the correct regime, the `present_pos` values can be trusted start_time = time.time() while time.time() - start_time < 5: if self._sensor_comms[self._comm_name].sensor_buffer.updated(): sensor_window, timestamp_window, index_window = self._sensor_comms[ self._comm_name].sensor_buffer.read_update(1) present_pos = sensor_window[0][self.reg_index['present_pos']] current_temperature = sensor_window[0][ self.reg_index['temperature']] if current_temperature > self.cool_down_temperature: print("Starting to overheat. sleep for a few seconds") time.sleep(10) error = self._reset_pos_.value - present_pos if abs(error) > 0.017: # ~1 deg integral = integral + (error * self.gripper_dt) derivative = (error - error_prior) / self.gripper_dt action = self.kp * error + self.ki * integral + self.kd * derivative error_prior = error else: break self._actuator_comms[self._comm_name].actuator_buffer.write( action) time.sleep(0.001) self._actuator_comms[self._comm_name].actuator_buffer.write(0) self.episode_steps = 0 rand_state_array_type, rand_state_array_size, rand_state_array = utils.get_random_state_array( self._rand_obj_.get_state()) np.copyto(self._shared_rstate_array_, np.frombuffer(rand_state_array, dtype=rand_state_array_type)) time.sleep( 0.1 ) # Give the shared buffer time to get updated and prevent false episode done conditions print("Reset done. Gripper pos: {}".format(present_pos))
def _compute_actuation_(self, action, timestamp, index): if self.crash_flag.value == 2: raise Exception("Compute actuation crash triggered.") self._actuation_packet_['generic1'] = action self._actuation_packet_['generic2'] = action values = self._rand_obj_.uniform(-1, +1, 3) rand_state_array_type, rand_state_array_size, rand_state_array = utils.get_random_state_array( self._rand_obj_.get_state() ) np.copyto(self._shared_rstate_array_, np.frombuffer(rand_state_array, dtype=rand_state_array_type)) np.copyto(self._uniform_array_, values)
def _reset_(self, q_start=None, q_target=None): """Resets the environment episode. Moves the arm to either fixed reference or random position and generates a new target within a safety box. """ print("Resetting") if q_target is None: self._q_target_, x_target = self._pick_random_angles_() else: self._q_target = self._q_ref.copy() self._q_target[self._joint_indices] = q_target _, inside_buffer_bound, _, x_target = self._check_bound(self._q_target) assert inside_buffer_bound, "Target Joint Angle should be in boundary" np.copyto(self._x_target_, x_target) if self._target_type == 'position': self._target_ = self._x_target_[self._end_effector_indices] elif self._target_type == 'angle': self._target_ = self._q_target_ self._action_ = self._rand_obj_.uniform(self._action_low, self._action_high) self._cmd_prev_ = np.zeros(len(self._action_low)) # to be used with derivative control of velocity if q_start is not None: reset_angles = self._q_ref.copy() reset_angles[self._joint_indices] = q_start _, inside_buffer_bound, _, _ = self._check_bound(reset_angles) assert inside_buffer_bound, "Invalid q start: {}".format(q_start) if self._safety_mode_ == ur_utils.SafetyModes.PROTECTIVE_STOP: print("Unlocking p-stop") self._actuator_comms['UR5'].actuator_buffer.write(self._pstop_unlock_packet) time.sleep(2.0) self._reset_arm(q_start) elif self._reset_type != 'none': if self._reset_type == 'random': reset_angles, _ = self._pick_random_angles_() elif self._reset_type == 'zero': reset_angles = self._q_ref[self._joint_indices] self._reset_arm(reset_angles) rand_state_array_type, rand_state_array_size, rand_state_array = utils.get_random_state_array( self._rand_obj_.get_state() ) np.copyto(self._shared_rstate_array_, np.frombuffer(rand_state_array, dtype=rand_state_array_type)) print("Reset done")
def test_random_state_array(self): rand_obj = np.random.RandomState(1) rand_state = rand_obj.get_state() original_uniform_values = rand_obj.uniform(-1, 1, 100) original_normal_values = rand_obj.randn(100) rand_state_array_type, rand_state_array_size, rand_state_array = get_random_state_array(rand_state) shared_rand_array = np.frombuffer(Array('b', rand_state_array_size).get_obj(), dtype=rand_state_array_type) np.copyto(shared_rand_array, np.frombuffer(rand_state_array, dtype=rand_state_array_type)) new_rand_obj = np.random.RandomState() new_rand_obj.set_state(get_random_state_from_array(shared_rand_array)) new_uniform_values = new_rand_obj.uniform(-1, 1, 100) new_normal_values = new_rand_obj.randn(100) assert np.all(original_uniform_values == new_uniform_values) assert np.all(original_normal_values == new_normal_values)
def _reset_(self): """The required _reset_ interface. This method does the handling of charging the Create2, repositioning, and set to the correct mode. """ logging.info("Resetting...") self._episode_step_.value = 0 np.copyto(self._prev_action_, np.array([0, 0])) for d in self._observation_def: d.reset() # wait for create2 to startup properly if just started (ie. wait to actually start receiving observation) while not self._sensor_comms[self._comm_name].sensor_buffer.updated(): time.sleep(0.01) sensor_window, _, _ = self._sensor_comms[ self._comm_name].sensor_buffer.read() if sensor_window[-1][0]['battery charge'] <= self._min_battery: logging.info("Waiting for Create2 to be manually docked.") self._wait_until_charged() sensor_window, _, _ = self._sensor_comms[ self._comm_name].sensor_buffer.read() # Always switch to SAFE mode to run an episode, so that Create2 will switch to PASSIVE on the # charger. If the create2 is in any other mode on the charger, we will not be able to detect # the non-responsive sleep mode that happens at the 60 seconds mark. logging.info("Setting Create2 into safe mode.") self._write_opcode('safe') time.sleep(0.1) # after charging/docked, try to drive away from the dock if still on it if sensor_window[-1][0]['charging sources available'] > 0: logging.info("Undocking the Create2.") self._write_opcode('drive_direct', -300, -300) time.sleep(1.0) self._write_opcode('drive_direct', 0, 0) time.sleep(0.1) # drive backward for a fixed amount logging.info("Moving Create2 into position.") target_values = [-150, -150] self._write_opcode('drive_direct', *target_values) rand_state_array_type, rand_state_array_size, rand_state_array = utils.get_random_state_array( self._rand_obj_.get_state()) np.copyto(self._shared_rstate_array_, np.frombuffer(rand_state_array, dtype=rand_state_array_type)) time.sleep(1.5) self._write_opcode('drive', 0, 0) # find the rotation angle (right wheel distance - left wheel distance) / wheel base distance self._total_rotation += (target_values[1] * 1.5 - target_values[0] * 1.5) / 235.0 * 180.0 / 3.14 self._wait_until_unwinded() # make sure in SAFE mode in case the random drive caused switch to PASSIVE, or # create2 stuck somewhere and require human reset (don't want an episode to start # until fixed, otherwise we get a whole bunch of one step episodes) sensor_window, _, _ = self._sensor_comms[ self._comm_name].sensor_buffer.read() while sensor_window[-1][0]['oi mode'] != 2: logging.warning( "Create2 not in SAFE mode, reattempting... (might require human intervention)." ) self._write_opcode('full') time.sleep(0.2) self._write_opcode('drive_direct', -50, -50) time.sleep(0.5) self._write_opcode('drive', 0, 0) time.sleep(0.1) self._write_opcode('safe') time.sleep(0.2) # try another unwind since unwind could have failed if stuck in PASSIVE mode self._wait_until_unwinded() sensor_window, _, _ = self._sensor_comms[ self._comm_name].sensor_buffer.read() # don't want to state during reset pollute the first sensation time.sleep(2 * self._internal_timing) logging.info("Reset completed.")
def __init__(self, communicator_setups, action_dim, observation_dim, run_mode='multiprocess', dt=.1, dt_tol=1e-5, sleep_time=0.0001, busy_loop=True, random_state=None, **kwargs ): """Inits RTRLBaseEnv object with task specific parameters. Args: communicator_setups: A dictionary containing configuration parameters for each physical device communicator. The form of this dictionary is as follows: config = {'name':{'Communicator': CommunicatorClass, 'kwargs': dict() } } action_dim: An integer dimensionality of the action space observation_dim: An integer dimensionality of the observation space run_mode: A string specifying the method of parallelism between the agent and the environment, one of 'singlethread', 'multithread', or 'multiprocess'. dt: A float timestep duration to maintain when calling 'step' or 'sense_wait' dt_tol: a float small tolerance subtracted from dt to compensate for OS delays when exiting sleep() in 'step' or 'sense_wait'. random_state: A tuple containing random state returned by numpy.random.RandomState().get_state(). This is to ensure reproducibility by reusing the same random state externally from an experiment script. sleep_time: a float representing lower bound on sleep() function time resolution provided by OS. For linux based OSes the resolution is typically ~0.001s, for Windows based OSes its ~0.01s. busy_loop: a boolean specifying whether to use busy loops instead of time.sleep() to accurately maintain short real time intervals. random_state: a numpy random state object to use for generating random sequences """ assert run_mode in ['singlethread', 'multithread', 'multiprocess'] self._run_mode = run_mode # Used for gym compatible step function self._dt = dt self._dt_tol = dt_tol self._sleep_time = sleep_time self._busy_loop = busy_loop # create random object based on passed random_state tuple self._rand_obj_ = np.random.RandomState() if random_state is None: random_state = self._rand_obj_.get_state() else: self._rand_obj_.set_state(random_state) rand_state_array_type, rand_state_array_size, rand_state_array = utils.get_random_state_array(random_state) # Ideally, the random state tuple of `_rand_obj_` needs to be copied to this `_shared_rstate_array_` # after every use of `_rand_obj_` for generating random numbers. self._shared_rstate_array_ = np.frombuffer(Array('b', rand_state_array_size).get_obj(), dtype=rand_state_array_type) np.copyto(self._shared_rstate_array_, np.frombuffer(rand_state_array, dtype=rand_state_array_type)) self._reset_flag = Value('i', 0) self._action_buffer = SharedBuffer( buffer_len=SharedBuffer.DEFAULT_BUFFER_LEN, array_len=action_dim, array_type='d', np_array_type='d', ) # Contains the observation vector, with the last element being the _reward_ self._sensation_buffer = SharedBuffer( buffer_len=SharedBuffer.DEFAULT_BUFFER_LEN, array_len=observation_dim + 2, array_type='d', np_array_type='d', ) # A dictionary of dictionaries, one for each communicator that is required self._communicator_setups = communicator_setups # Dictionary of all instantiated communicator processes # Both sensor_comms and actuator_comms would have a reference to the same # communicator when that communicator contains both sensors and actuators self._sensor_comms = {} self._actuator_comms = {} # Contains a reference to every communicator. # Used for terminating all communicators once when the environment is closed self._all_comms = {} # Dictionary containing actuation packets for each communicator self._actuation_packet_ = {} # Dictionary containing the number of sensor packets to read at a time # from each communicator self._num_sensor_packets = {} # Construct the communicators without starting for name, setup in communicator_setups.items(): # Initialize communicator with the given parameters comm = setup['Communicator'](**setup['kwargs']) if comm.use_actuator: self._actuation_packet_[name] = np.zeros( shape=comm.actuator_buffer.array_len, dtype=comm.actuator_buffer.np_array_type, ) self._actuator_comms[name] = comm if comm.use_sensor: if 'num_sensor_packets' in setup.keys(): self._num_sensor_packets[name] = setup['num_sensor_packets'] else: self._num_sensor_packets[name] = 1 self._sensor_comms[name] = comm self._all_comms[name] = comm self._running = False