示例#1
0
 def __init__(self,
              env,
              max_length=np.inf,
              dense_reward=True,
              save_fr=10,
              save_dest="state_box",
              render=False):
     Env.__init__(self)
     DictSerializable.__init__(self, DictSerializable.get_numpy_save())
     self.eval_env = env
     # Define action and observation space
     # They must be gym.spaces objects
     # Example when using discrete actions:
     if env is not None:
         self.action_space = self.eval_env.action_space
         # Example for using image as input:
         self.observation_space = self.eval_env.observation_space
     self._dense_reward = dense_reward
     self.partial_reward = 0.
     self.partial_length = 0
     self.returns = []
     self.episode_lengths = []
     self.successes = []
     self._unused = True
     self._max_length = max_length
     self.max_episode_steps = max_length
     self._save_fr = save_fr
     self._save_dest = save_dest
     self._render = render
示例#2
0
    def __init__(self, config, visualize=True):
        Env.__init__(self)

        self._config = config
        self._step_dt = config['env']['step_dt']
        self._model_name = "mug"

        # setup DPS wrapper
        self._diagram_wrapper = DrakePusherSliderDiagramWrapper(config)

        # setup the simulator
        # add procedurally generated table
        env_utils.add_procedurally_generated_table(self.diagram_wrapper.mbp,
                                                   config['env']['table']),

        self.diagram_wrapper.add_pusher()

        self.add_object_model()

        self.diagram_wrapper.finalize()
        self.diagram_wrapper.export_ports()

        if visualize:
            self.diagram_wrapper.connect_to_meshcat()
            self.diagram_wrapper.connect_to_drake_visualizer()

        self.diagram_wrapper.add_sensors_from_config(config)
        self.diagram_wrapper.build()

        # records port indices
        self._port_idx = dict()

        # add controller and other stuff
        builder = DiagramBuilder()
        self._builder = builder
        # print("type(self.diagram_wrapper.diagram)", type(self.diagram_wrapper.diagram))
        builder.AddSystem(self.diagram_wrapper.diagram)

        # need to connect actuator ports
        # set the controller gains
        pid_data = self.diagram_wrapper.add_pid_controller(builder=builder)
        self._port_idx["pid_input_port_desired_state"] = pid_data[
            'pid_input_port_index']

        diagram = builder.Build()
        self._diagram = diagram
        self._pid_input_port_desired_state = self._diagram.get_input_port(
            self._port_idx["pid_input_port_desired_state"])

        # setup simulator
        context = diagram.CreateDefaultContext()
        self._simulator = Simulator(self._diagram, context)
        self._sim_initialized = False
        self._context = context

        # reset env
        self.reset()
示例#3
0
    def __init__(self, grid: Grid):

        Env.__init__(self)
        GameEnv.__init__(self, grid)
        self.FPS = -1
        self.clock = None

        # Define action and observation space
        # They must be gym.spaces objects
        # Using 3 discrete actions:
        self.action_space = spaces.Discrete(3)
        # Using image as input:
        self.observation_space = spaces.Box(low=0, high=255,
                                            shape=(grid.x * grid.scale, grid.y * grid.scale, 3), dtype=np.uint8)
        self.reset()
示例#4
0
 def __init__(self, action_mapping):
     self._seed()
     self.verbose = 0
     self.viewer = None
     self.batch_size = 32
     self.optimizer = None
     self.model = None
     self.current_step = 0
     self.action_mapping = action_mapping
     self.action_space = action_mapping.action_space
     bounds = float('inf')
     self.observation_space = spaces.Box(-bounds, bounds, (4, ))
     self.viewer = None
     self.best = None
     self.evaluate_test = False
     Env.__init__(self)
示例#5
0
 def __init__(self,
              num_healthy,
              num_contaminated,
              world_size,
              min_obs_rad,
              max_obs_rad,
              torus=False,
              dynamics='direct'):
     Env.__init__(self)
     self.num_healthy = num_healthy
     self.num_contaminated = num_contaminated
     self.world_size = world_size
     self.min_obs_rad = min_obs_rad
     self.max_obs_rad = max_obs_rad
     self.torus = torus
     self.world = base.World(world_size, torus, dynamics)
     self.global_actor = GlobalActor(min_obs_rad, max_obs_rad)
     ClusterManager(self.max_obs_rad)
     self.reset()
    def __init__(self, sess=None):
        ''' 
        '''
        Env.__init__(self)
        # a dictionary specifying configurations
        # It will not change later.
        self.configs = {
            # the number of problems (each set of (X,Y) data is a different problem). (different objective functions)
            "problem_num": 120,
            # the number of data for each problem in each type.
            "num_data_each": 25,
            # the number of data types for one problem.
            "num_data_type": 4,
            # the maximum number of times the agent is allowed to optimize this problem.
            "max_opt_times": 1000,
            # the dimension for input data 'x' and also the dimension of weight.
            "x_dim": 3,
            # 'c' is the constant specified in the paper.
            "c": 1,
            # In the paper's common settings
            "horizon": 25,
        }

        logger.log("Robust Linear Regression environment initializing...")

        # generate data sampling operations
        self.distributions = [
            self._rand_Gaussian_Dist()
            for _ in range(self.configs["num_data_type"])
        ]
        self.sample_data_ops = [
            dist.sample(self.configs["num_data_each"])
            for dist in self.distributions
        ]
        self.perturbation_dist = tfd.Normal(loc=0, scale=1)
        self.perturbation_ops = self.perturbation_dist.sample(
            self.configs["num_data_each"])

        # assign tf session
        if sess != None:
            self.sess = sess
        else:
            self.sess = tf.Session()

        # setup the formula
        self.vars = {}
        with tf.variable_scope("regression-objective"):
            # not initialized storing value, which will be numpy array when running.
            self.vars["w_val"] = None
            self.vars["b_val"] = None
            # equation placeholder
            self.vars["w"] = tf.placeholder(tf.float32,
                                            shape=(self.configs["x_dim"], ),
                                            name="w")
            self.vars["b"] = tf.placeholder(tf.float32, shape=(1, ), name="b")
            self.vars["x"] = tf.placeholder(tf.float32,
                                            shape=(self.configs["x_dim"],
                                                   None),
                                            name="x")
            self.vars["y"] = tf.placeholder(tf.float32,
                                            shape=(1, None),
                                            name="y")

            w_reshape = tf.reshape(self.vars["w"], [1, self.configs["x_dim"]])
            # based on the equation, "dom" is the term in the parenthesis to be squared.
            self.vars["dom"] = self.vars["y"] - (
                tf.matmul(w_reshape, self.vars["x"]) - self.vars["b"])
            self.vars["squared"] = tf.math.square(self.vars["dom"])
            self.vars["each"] = tf.divide(
                self.vars["squared"],
                (self.vars["squared"] + self.configs["c"] * self.configs["c"]))
            self.vars["loss"] = tf.reduce_mean(self.vars["each"], axis=1)
            # gradients for the controller information
            self.vars["gradients"] = tf.gradients(
                self.vars["loss"], [self.vars["w"], self.vars["b"]])

        # initializing data, storing function is not implemented
        self.all_data = [
            self._generate_data() for _ in range(self.configs["problem_num"])
        ]
        self.data_ind = 0  # using index to retrive data from all collection

        # reset the environment for starting
        self.reset()
示例#7
0
    def __init__(self,
                 c,
                 worker_id,
                 start_position,
                 with_step_penalty,
                 with_revisit_penalty,
                 stay_inside,
                 with_color_reward,
                 total_reward,
                 covered_steps_ratio,
                 depth_channel_first=True,
                 changing_start_positions=False,
                 as_image=False,
                 color_on_visit=True):
        Env.__init__(self)
        log.info('creating environment for files {}'.format(c.data_files))
        # needed in order to simulate gym environment
        self.reward_range = None
        self.metadata = {'render.modes': []}
        self.spec = None
        self.enabled = False
        self.observation_space = None

        self.c = c
        # First channel
        # 0 - blank cell
        # 1 - pattern cell
        # Second channel
        # 0 - not stitched
        # 1 - stitched
        # Third channel
        # 0 - no agent
        # 1 - agent
        self.with_step_penalty = with_step_penalty
        self.with_revisit_penalty = with_revisit_penalty
        self.stay_inside = stay_inside
        self.action_encodings = {0: 'u', 1: 'd', 2: 'l', 3: 'r'}
        self.with_color_reward = with_color_reward
        self.total_reward = total_reward
        self.covered_steps_ratio = covered_steps_ratio
        self.inv_action_encodings = {
            v: k
            for k, v in self.action_encodings.items()
        }
        self.action_space = spaces.Discrete(len(self.action_encodings))
        self.layer_descriptions = OrderedDict([
            (ColoringEnv.channel_pattern, 'Pattern'),
            (ColoringEnv.channel_stitch, 'Completed pattern'),
            (ColoringEnv.channel_agent, 'Agent position'),
        ])
        self.worker_id = worker_id
        self.start_position = start_position
        self.env_reset_count = 0
        self.steps = []
        self.emb_pattern_layer = None
        self.emb_pattern_count = 0
        self.x_dim = None
        self.y_dim = None
        self.base_observation = None
        self.initial_observation = None
        self.max_steps = -1
        self.step_count = 0
        self.data_file = None
        self.done = False
        self.depth_channel_first = depth_channel_first
        self.changing_start_positions = changing_start_positions
        self.as_image = as_image
        self.color_on_visit = color_on_visit
        self.alice_state = None
        self.init_uncovered_count = 0
        self.reset()