示例#1
0
    def __init__(self, environment_filename, no_graphics):
        engine_configuration_channel = EngineConfigurationChannel()
        self.env = UnityEnvironment(
            file_name=environment_filename,
            side_channels=[engine_configuration_channel],
            no_graphics=no_graphics)
        self.env.reset()

        self.brain_name = self.env.get_agent_groups()
        self.group_spec = self.env.get_agent_group_spec(self.brain_name[0])
        engine_configuration_channel.set_configuration_parameters(
            width=640, height=480, time_scale=3.0)
        self.group_name = self.brain_name

        # Set observation and action spaces
        if self.group_spec.is_action_discrete():
            self._action_space = []
            branches = self.group_spec.discrete_action_branches
            # if self.group_spec.action_shape == 1:
            for _ in range(2):
                self._action_space.append([
                    spaces.Discrete(branches[i]) for i in range(len(branches))
                ])
        else:
            high = np.array([1] * self.group_spec.action_shape)
            self._action_space = spaces.Box(-high, high, dtype=np.float32)

        high = np.array([np.inf] * self._get_vec_obs_size())

        self._observation_space = spaces.Box(-high, high, dtype=np.float32)
示例#2
0
def train():
    engine_configuration_channel = EngineConfigurationChannel()
    # 時間スケールを20倍に設定
    engine_configuration_channel.set_configuration_parameters(time_scale=20.0)
    unity_env = UnityEnvironment("./ml-agents/Project/PushBlock", side_channels=[engine_configuration_channel])
    env = UnityToGymWrapper(unity_env, 0, flatten_branched=True)
    logger.configure('./logs')
    # DQNで学習
    model = deepq.learn(
        env,
        "mlp",
        seed=0,
        lr=2.5e-4,
        total_timesteps=400000,
        buffer_size=50000,
        exploration_fraction=0.05,
        exploration_final_eps=0.1,
        print_freq=20,
        train_freq=5,
        learning_starts=20000,
        target_network_update_freq=50,
        gamma=0.99,
        prioritized_replay=False,
        checkpoint_freq=1000,
        dueling=True,
        checkpoint_path=None,
        load_path="./model"
    )

    # モデルを保存
    save_path = "./model"
    ckpt = tf.train.Checkpoint(model=model)
    manager = tf.train.CheckpointManager(ckpt, save_path, max_to_keep=1)
    manager.save()
示例#3
0
    def __init__(self,
                 unity_env,
                 time_scale=1.0,
                 width=720,
                 height=480,
                 target_frame_rate=60,
                 quality_level=5):
        """ Initializes the game

        :param unity_env: (UnityEnvironment) Environment where the game will be played
        :param time_scale:(float) Speed of the game
        :param width:(int) Window's width
        :param height:(int) Window's height
        :param target_frame_rate:(int) Frame rate
        :param quality_level:(int) Visual quality

        Todo: Commentate a little, reorganise
        """
        self.unity_env = unity_env
        self.unity_env.reset()
        engine_configuration_channel = EngineConfigurationChannel()
        engine_configuration_channel.set_configuration_parameters(
            time_scale=time_scale,
            width=width,
            height=height,
            target_frame_rate=target_frame_rate,
            quality_level=quality_level)
        self.unity_env.side_channels[2] = engine_configuration_channel

        self.group_name = unity_env.get_agent_groups()[0]
        self.group_spec = unity_env.get_agent_group_spec(self.group_name)
        self.n_agents = self.unity_env.get_step_result(
            self.group_name).n_agents()
        self.action_size = self.group_spec.action_size
示例#4
0
文件: learner.py 项目: jun-hyeok/test
    def initialize_env(self, config, env_file) -> Environment:
        """
        Initialize the environment.

        Args:
            config: the configuration parameters.
            env_file: the environment file.
        Returns:
            env: Environment
        """
        # [3] Environment configuration
        base_port = int(input("Enter base port: "))
        time_scale = int(config.get("time_scale"))
        width = int(config.get("width"))
        height = int(config.get("height"))

        channel_config = EngineConfigurationChannel()
        channel_param = EnvironmentParametersChannel()

        env = Environment(
            file_name=env_file,
            base_port=base_port,
            side_channels=[channel_config, channel_param],
        )

        channel_config.set_configuration_parameters(time_scale=time_scale,
                                                    quality_level=1,
                                                    width=width,
                                                    height=height)

        env.set_float_parameters(config)
        return env
示例#5
0
def test_engine_configuration():
    sender = EngineConfigurationChannel()
    # We use a raw bytes channel to interpred the data
    receiver = RawBytesChannel(sender.channel_id)

    config = EngineConfig.default_config()
    sender.set_configuration(config)
    data = SideChannelManager([sender]).generate_side_channel_messages()
    SideChannelManager([receiver]).process_side_channel_message(data)

    received_data = receiver.get_and_clear_received_messages()
    assert len(received_data) == 5  # 5 different messages one for each setting

    sent_time_scale = 4.5
    sender.set_configuration_parameters(time_scale=sent_time_scale)

    data = SideChannelManager([sender]).generate_side_channel_messages()
    SideChannelManager([receiver]).process_side_channel_message(data)

    message = IncomingMessage(receiver.get_and_clear_received_messages()[0])
    message.read_int32()
    time_scale = message.read_float32()
    assert time_scale == sent_time_scale

    with pytest.raises(UnitySideChannelException):
        sender.set_configuration_parameters(width=None, height=42)

    with pytest.raises(UnityCommunicationException):
        # try to send data to the EngineConfigurationChannel
        sender.set_configuration_parameters(time_scale=sent_time_scale)
        data = SideChannelManager([sender]).generate_side_channel_messages()
        SideChannelManager([sender]).process_side_channel_message(data)
示例#6
0
文件: my_env.py 项目: Maxpridy/RLzoo
    def __init__(self, worker_id, realtime_mode=False):
        self.reset_parameters = EnvironmentParametersChannel()
        self.engine_config = EngineConfigurationChannel()

        env_path = "C:/myDesktop/source/gridworld_imitation/food_collector_4"

        self._env = UnityEnvironment(
            env_path,
            worker_id,
            side_channels=[self.reset_parameters, self.engine_config])
        self._env.reset()

        self.behavior_name = list(self._env.behavior_specs)[0]
        behavior_spec = self._env.behavior_specs[self.behavior_name]
        print(behavior_spec)

        if realtime_mode:
            self.engine_config.set_configuration_parameters(time_scale=1.0)
            self.reset_parameters.set_float_parameter("train-mode", 0.0)
        else:
            self.engine_config.set_configuration_parameters(time_scale=20.0)
            self.reset_parameters.set_float_parameter("train-mode", 1.0)

        self._flattener = ActionFlattener(
            behavior_spec.action_spec.discrete_branches)
示例#7
0
 def _create_engine_channel(self):
     engine_channel = EngineConfigurationChannel()
     engine_config = EngineConfig(80, 80, 1, 4.0, 30 *
                                  4) if self.train_mode else EngineConfig(
                                      1280, 720, 1, 1.0, 60)
     engine_channel.set_configuration(engine_config)
     return engine_channel
    def make_unity_env(self,
                       env_name,
                       float_params=dict(),
                       time_scale=1,
                       seed=time.time(),
                       worker_id=None,
                       **kwargs):
        """
        creates a gym environment from a unity game

        env_name: str
            the path to the game
        float_params: dict or None
            this should be a dict of argument settings for the unity
            environment
            keys: varies by environment
        time_scale: float
            argument to set Unity's time scale. This applies less to
            gym wrapped versions of Unity Environments, I believe..
            but I'm not sure
        seed: int
            the seed for randomness
        worker_id: int
            must specify a unique worker id for each unity process
            on this machine
        """
        if float_params is None: float_params = dict()
        path = os.path.expanduser(env_name)
        channel = EngineConfigurationChannel()
        env_channel = EnvironmentParametersChannel()
        channel.set_configuration_parameters(time_scale=1)
        for k, v in float_params.items():
            if k == "validation" and v >= 1:
                print("Game in validation mode")
            env_channel.set_float_parameter(k, float(v))
        if worker_id is None: worker_id = seed % 500 + 1
        env_made = False
        n_loops = 0
        worker_id = 0
        while not env_made and n_loops < 50:
            try:
                env = UnityEnvironment(file_name=path,
                                       side_channels=[channel, env_channel],
                                       worker_id=worker_id,
                                       seed=seed)
                env_made = True
            except:
                s = "Error encountered making environment, "
                s += "trying new worker_id"
                print(s)
                worker_id = (worker_id + 1 +
                             int(np.random.random() * 100)) % 500
                try:
                    env.close()
                except:
                    pass
                n_loops += 1
        env = UnityToGymWrapper(env, allow_multiple_obs=True)
        return env
示例#9
0
class FQ_Env(object):
    def __init__(self):
        self.engine_configuration_channel = EngineConfigurationChannel()
        self.env = UnityEnvironment(side_channels=[self.engine_configuration_channel])
        self.engine_configuration_channel.set_configuration_parameters(
            # width = 84,
            # height = 84,
            # quality_level = 5, #1-5
            time_scale = 1  # 1-100
            # target_frame_rate = 60, #1-60
            # capture_frame_rate = 60 #default 60
        )

        self.reset()

        self.n = self.agent_num()

        self.state_shapes = [self.env.get_behavior_spec(behavior_name).observation_shapes[0][0] for behavior_name in
                             self.env.get_behavior_names()]
        self.action_dims = [self.env.get_behavior_spec(behavior_name).action_shape for behavior_name in
                             self.env.get_behavior_names()]
    def agent_num(self):
        behavior_names = self.env.get_behavior_names()
        agent_num = len(behavior_names)
        return agent_num

    def reset(self):
        self.env.reset()
        cur_state = []
        for behavior_name in self.env.get_behavior_names():
            DecisionSteps, TerminalSteps = self.env.get_steps(behavior_name)
            cur_state.append(DecisionSteps.obs[0][0])
        return cur_state

    def step(self, actions):
        next_state = []
        reward = []
        done = []
        for behavior_name_index, behavior_name in enumerate(self.env.get_behavior_names()):
            self.env.set_actions(behavior_name=behavior_name, action=np.asarray([actions[behavior_name_index]]))
        self.env.step()

        for behavior_name in self.env.get_behavior_names():
            DecisionSteps, TerminalSteps = self.env.get_steps(behavior_name)
            if len(TerminalSteps.reward) == 0:
                next_state.append(DecisionSteps.obs[0][0])
                reward.append(DecisionSteps.reward[0])
                done.append(False)
            else:
                next_state.append(TerminalSteps.obs[0][0])
                reward.append(TerminalSteps.reward[0])
                done.append(True)

        return next_state, reward, done

    def close(self):
        self.env.close()
示例#10
0
文件: main.py 项目: rapsealk/Mummy
def main():
    """
    file_name: is the name of the environment binary (located in the root directory of the python project)
    worker_id: indicates which port to use for communication with the environment. For use in parallel training regimes such as A3C.
    seed: indicates the seed to use when generating random numbers during the training process.
          In environments which are deterministic, setting the seed enables reproducible experimentation by ensuring that the environment and trainers utilize the same random seed.
    side_channels: provides a way to exchange data with the Unity simulation that is not related to the reinforcement learning loop.
                   For example: configurations or properties.
                   More on them in the "Modifying the environment from Python"(https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Python-API.md#modifying-the-environment-from-python) section.
    ---
    env.reset()
    env.step()
    env.close()
    """
    channel = EngineConfigurationChannel()

    filename = "Mummy"
    env = UnityEnvironment(file_name=filename, seed=1, side_channels=[channel])

    channel.set_configuration_parameters(time_scale=2.0)

    env.reset()
    behavior_names = env.behavior_specs.keys()
    for name in behavior_names:
        print('behavior_name:', name)  # Mummy?team=0

    decision_steps, terminal_steps = env.get_steps(
        behavior_name="Mummy?team=0")
    """
    print('DecisionSteps')
    print('- observation:', decision_steps.obs)
    print('- reward:', decision_steps.reward)
    print('- agent_id:', decision_steps.agent_id)
    print('- action_mask:', decision_steps.action_mask)

    print('TerminalSteps')
    print('- observation:', terminal_steps.obs)
    print('- reward:', terminal_steps.reward)
    print('- agent_id:', terminal_steps.agent_id)
    print('- interrupted:', terminal_steps.interrupted)
    """

    while True:
        for i in decision_steps.agent_id:
            if i in terminal_steps.agent_id:
                continue
            env.set_action_for_agent(behavior_name="Mummy?team=0",
                                     agent_id=i,
                                     action=np.random.uniform(-1.0,
                                                              1.0,
                                                              size=(2, )))
        env.step()

        decision_steps, terminal_steps = env.get_steps(
            behavior_name="Mummy?team=0")
示例#11
0
class UnityWrapper(object):
    def __init__(self, env_args):
        self.engine_configuration_channel = EngineConfigurationChannel()
        if env_args['train_mode']:
            self.engine_configuration_channel.set_configuration_parameters(
                time_scale=env_args['train_time_scale'])
        else:
            self.engine_configuration_channel.set_configuration_parameters(
                width=env_args['width'],
                height=env_args['height'],
                quality_level=env_args['quality_level'],
                time_scale=env_args['inference_time_scale'],
                target_frame_rate=env_args['target_frame_rate'])
        self.float_properties_channel = EnvironmentParametersChannel()
        if env_args['file_path'] is None:
            self._env = UnityEnvironment(base_port=5004,
                                         seed=env_args['env_seed'],
                                         side_channels=[
                                             self.engine_configuration_channel,
                                             self.float_properties_channel
                                         ])
        else:
            unity_env_dict = load_yaml('/'.join(
                [os.getcwd(), 'rls', 'envs', 'unity_env_dict.yaml']))
            self._env = UnityEnvironment(
                file_name=env_args['file_path'],
                base_port=env_args['port'],
                no_graphics=not env_args['render'],
                seed=env_args['env_seed'],
                side_channels=[
                    self.engine_configuration_channel,
                    self.float_properties_channel
                ],
                additional_args=[
                    '--scene',
                    str(
                        unity_env_dict.get(env_args.get('env_name', 'Roller'),
                                           'None')), '--n_agents',
                    str(env_args.get('env_num', 1))
                ])
        self.reset_config = env_args['reset_config']

    def reset(self, **kwargs):
        reset_config = kwargs.get('reset_config', None) or self.reset_config
        for k, v in reset_config.items():
            self.float_properties_channel.set_float_parameter(k, v)
        self._env.reset()

    def __getattr__(self, name):
        if name.startswith('_'):
            raise AttributeError(
                "attempted to get missing private attribute '{}'".format(name))
        return getattr(self._env, name)
示例#12
0
 def _create_env(self, env_file, time_scale, no_graphics):
     channel = EngineConfigurationChannel()
     env = UnityEnvironment(
         file_name=env_file,
         no_graphics=no_graphics,
         side_channels=[channel],
         # See if setting a worker id allows me to spin up more agents
         worker_id=proc_id(),
     )
     channel.set_configuration_parameters(
         time_scale=time_scale,
     )
     return env
示例#13
0
def _make_unity_env(
        env_path: Optional[str] = None,
        port: int = UnityEnvironment.BASE_ENVIRONMENT_PORT,
        seed: int = -1,
        env_args: Optional[List[str]] = None,
        engine_config: Optional[EngineConfig] = None,
        side_channels: Optional[List[SideChannel]] = None) -> UnityEnvironment:
    """
    Create a UnityEnvironment.
    """
    # Use Unity Editor if env file is not provided.
    if env_path is None:
        port = UnityEnvironment.DEFAULT_EDITOR_PORT
    else:
        launch_string = UnityEnvironment.validate_environment_path(env_path)
        if launch_string is None:
            raise UnityEnvironmentException(
                f"Couldn't launch the {env_path} environment. Provided filename does not match any environments."
            )
        logger.info(f"Starting environment from {env_path}.")

    # Configure Unity Engine.
    if engine_config is None:
        engine_config = EngineConfig.default_config()

    engine_configuration_channel = EngineConfigurationChannel()
    engine_configuration_channel.set_configuration(engine_config)

    if side_channels is None:
        side_channels = [engine_configuration_channel]
    else:
        side_channels.append(engine_configuration_channel)

    # Find an available port to connect to Unity environment.
    while True:
        try:
            env = UnityEnvironment(
                file_name=env_path,
                seed=seed,
                base_port=port,
                args=env_args,
                side_channels=side_channels,
            )
        except UnityWorkerInUseException:
            logger.debug(f"port {port} in use.")
            port += 1
        else:
            logger.info(f"Connected to environment using port {port}.")
            break

    return env
示例#14
0
 def initialize_all_side_channels(self, initialize_config, engine_config):
     """
     初始化所有的通讯频道
     """
     engine_configuration_channel = EngineConfigurationChannel()
     engine_configuration_channel.set_configuration_parameters(
         **engine_config)
     float_properties_channel = EnvironmentParametersChannel()
     float_properties_channel.set_float_parameter('env_copies',
                                                  self._n_copies)
     for k, v in initialize_config.items():
         float_properties_channel.set_float_parameter(k, v)
     return dict(engine_configuration_channel=engine_configuration_channel,
                 float_properties_channel=float_properties_channel)
示例#15
0
    def __init__(self, n_episodes, env_name, model):

        # Nª Episodes
        self.n_episodes = n_episodes

        # Environment
        self.env_name = env_name
        channel = EngineConfigurationChannel()
        self.env = UnityEnv(self.env_name,
                            worker_id=0,
                            use_visual=False,
                            side_channels=[channel],
                            no_graphics=False,
                            multiagent=False)
        self.action_size, self.state_size = Utils.getActionStateSize(self.env)

        # Model
        self.model = ActorCritic(self.state_size, self.action_size,
                                 seed=0).to(device)

        # Initialize time step (for updating every "update_every" time steps)
        self.t_step = 1

        # Start test
        self.load_model(model)
        self.test()
示例#16
0
def get_env_info(env_name, args):
    engine_configuration_channel = EngineConfigurationChannel()
    env = UnityEnvironment(
        file_name="envs/{0}".format(env_name)
        if args.run_unity_editor is not True else None,
        #    no_graphics=True,
        side_channels=[engine_configuration_channel])

    env.reset()

    behavior_name_list = list(env.behavior_specs.keys())

    dec, _ = env.get_steps(behavior_name_list[0])  #Coordinator
    state = dec.obs[0]

    dec, _ = env.get_steps(behavior_name_list[1])  # Agent
    obs = dec.obs[0]

    env_arg = {
        "n_agents": dec.action_mask[0].shape[0],
        "n_actions": dec.action_mask[0].shape[1],
        "state_shape": state.shape[1],
        "obs_shape": obs.shape[1],
        "episode_limit": 160
    }

    return env, env_arg, engine_configuration_channel
示例#17
0
    def __init__(self):

        # Hyperparameters
        self.learning_rate = 0.0003
        self.betas = (0.9, 0.999)
        self.gamma = 0.99
        self.eps_clip = 0.2
        self.buffer_size = 2048
        self.batch_size = 256
        self.K_epochs = 3
        self.max_steps = 100000

        self.tau = 0.95
        self.entropy_coef = 0.001
        self.value_loss_coef = 0.5

        self.summary_freq = 1000

        # Environment
        self.env_name = "Environments/env1/Unity Environment"
        channel = EngineConfigurationChannel()
        self.env = UnityEnv(self.env_name,
                            worker_id=0,
                            use_visual=False,
                            side_channels=[channel],
                            no_graphics=False,
                            multiagent=True)
        channel.set_configuration_parameters(time_scale=100)
        self.action_size, self.state_size = Utils.getActionStateSize(self.env)
        self.n_agents = self.env.number_agents
        print("Nº of Agents: ", self.n_agents)

        # Model
        self.model = ActorCritic(self.state_size, self.action_size,
                                 seed=0).to(device)
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=self.learning_rate,
                                    betas=self.betas)
        self.MseLoss = nn.MSELoss()

        # Buffer memory
        self.memory = []
        for _ in range(self.n_agents):
            self.memory.append(Buffer())

        # Initialize time step (for updating when buffer_size is full)
        self.t_step = 1
示例#18
0
    def __init__(self):

        # Hyperparameters
        self.learning_rate = 0.0003
        self.buffer_size = 10240
        self.batch_size = 1024
        self.gamma = 0.99
        self.update_every = 64
        self.max_steps = 100000

        self.epsilon = 1.0
        self.epsilon_end = 0.01
        self.epsilon_decay = 0.995
        self.tau = 0.01

        self.summary_freq = 1000

        # Environment
        self.env_name = "Environments/env1/Unity Environment"
        channel = EngineConfigurationChannel()
        self.env = UnityEnv(self.env_name,
                            worker_id=0,
                            use_visual=False,
                            side_channels=[channel],
                            no_graphics=False,
                            multiagent=False)
        channel.set_configuration_parameters(time_scale=100)
        self.action_size, self.state_size = Utils.getActionStateSize(self.env)
        self.n_agents = self.env.number_agents

        # Models
        self.local_model = QNetwork(self.state_size, self.action_size,
                                    seed=0).to(device)
        self.target_model = QNetwork(self.state_size, self.action_size,
                                     seed=0).to(device)
        self.optimizer = optim.Adam(self.local_model.parameters(),
                                    lr=self.learning_rate)

        # Buffer memory
        self.memory = Buffer(self.buffer_size,
                             self.batch_size,
                             seed=0,
                             device=device)

        # Initialize time step (for updating every "update_every" time steps)
        self.t_step = 0
示例#19
0
文件: wrappers.py 项目: zhijie-ai/RLs
 def initialize_all_side_channels(self, kwargs):
     '''
     初始化所有的通讯频道
     '''
     engine_configuration_channel = EngineConfigurationChannel()
     engine_configuration_channel.set_configuration_parameters(
         width=kwargs['width'],
         height=kwargs['height'],
         quality_level=kwargs['quality_level'],
         time_scale=1
         if bool(kwargs.get('inference', False)) else kwargs['time_scale'],
         target_frame_rate=kwargs['target_frame_rate'],
         capture_frame_rate=kwargs['capture_frame_rate'])
     float_properties_channel = EnvironmentParametersChannel()
     for k, v in kwargs.get('initialize_config', {}).items():
         float_properties_channel.set_float_parameter(k, v)
     return dict(engine_configuration_channel=engine_configuration_channel,
                 float_properties_channel=float_properties_channel)
示例#20
0
def play():
    engine_configuration_channel = EngineConfigurationChannel()
    # 時間スケールを10倍に設定
    engine_configuration_channel.set_configuration_parameters(time_scale=10.0)
    unity_env = UnityEnvironment("./ml-agents/Project/PushBlock",
                                 side_channels=[engine_configuration_channel])
    env = UnityToGymWrapper(unity_env, 0, flatten_branched=True)
    # モデル読み込み
    model = deepq.learn(env, "mlp", total_timesteps=0, load_path="./model")

    obs = env.reset()
    obs = np.expand_dims(np.array(obs), axis=0)

    while True:
        action, _, _, _ = model.step(tf.constant(obs))
        action = action[0].numpy()
        obs, rew, done, _ = env.step(action)
        if done:
            obs = env.reset()
        obs = np.expand_dims(np.array(obs), axis=0)
示例#21
0
    def __init__(self):
        self.engine_configuration_channel = EngineConfigurationChannel()
        self.env = UnityEnvironment(side_channels=[self.engine_configuration_channel])
        self.engine_configuration_channel.set_configuration_parameters(
            # width = 84,
            # height = 84,
            # quality_level = 5, #1-5
            time_scale = 1  # 1-100
            # target_frame_rate = 60, #1-60
            # capture_frame_rate = 60 #default 60
        )

        self.reset()

        self.n = self.agent_num()

        self.state_shapes = [self.env.get_behavior_spec(behavior_name).observation_shapes[0][0] for behavior_name in
                             self.env.get_behavior_names()]
        self.action_dims = [self.env.get_behavior_spec(behavior_name).action_shape for behavior_name in
                             self.env.get_behavior_names()]
示例#22
0
    def initialise_environment(self):
        """Initialise and reset unity environment"""
        engine_configuration_channel = EngineConfigurationChannel()
        self.float_properties_channel = FloatPropertiesChannel()
        self.env = UnityEnvironment(file_name=self.env_path,
                                    base_port=5004,
                                    side_channels=[
                                        engine_configuration_channel,
                                        self.float_properties_channel
                                    ])

        # Reset the environment
        self.env.reset()

        # Set the default brain to work with
        self.group_name = self.env.get_agent_groups()[0]
        self.group_spec = self.env.get_agent_group_spec(self.group_name)

        # Set the time scale of the engine
        engine_configuration_channel.set_configuration_parameters(
            time_scale=self.time_scale)
示例#23
0
def test_set_action_multi_agent():
    engine_config_channel = EngineConfigurationChannel()
    env = default_registry[BALL_ID].make(
        base_port=6001,
        worker_id=0,
        no_graphics=True,
        side_channels=[engine_config_channel],
    )
    engine_config_channel.set_configuration_parameters(time_scale=100)
    for _ in range(3):
        env.reset()
        behavior_name = list(env.behavior_specs.keys())[0]
        d, t = env.get_steps(behavior_name)
        for _ in range(50):
            action = np.ones((len(d), 2))
            action_tuple = ActionTuple()
            action_tuple.add_continuous(action)
            env.set_actions(behavior_name, action_tuple)
            env.step()
            d, t = env.get_steps(behavior_name)
    env.close()
示例#24
0
    def create_engine_config_side_channel(self) -> EngineConfigurationChannel:

        if self.play or self.inference:
            engine_configuration = EngineConfig(
                width=self.WINDOW_WIDTH.play,
                height=self.WINDOW_HEIGHT.play,
                quality_level=self.QUALITY_LEVEL.play,
                time_scale=self.TIMESCALE.play,
                target_frame_rate=self.TARGET_FRAME_RATE.play,
            )
        else:
            engine_configuration = EngineConfig(
                width=self.WINDOW_WIDTH.train,
                height=self.WINDOW_HEIGHT.train,
                quality_level=self.QUALITY_LEVEL.train,
                time_scale=self.TIMESCALE.train,
                target_frame_rate=self.TARGET_FRAME_RATE.train,
            )
        engine_configuration_channel = EngineConfigurationChannel()
        engine_configuration_channel.set_configuration(engine_configuration)
        return engine_configuration_channel
示例#25
0
 def __init__(self):
    
     # Hyperparameters
     self.learning_rate = 0.0003
     self.gamma = 0.99
     self.batch_size = 256
     self.max_steps = 100000
     
     self.tau = 0.95
     self.entropy_coef = 0.001
     self.value_loss_coef = 0.5
     
     self.summary_freq = 1000
     
     # Environment
     self.env_name = "Environments/env1/Unity Environment"
     channel = EngineConfigurationChannel()
     self.env = UnityEnv(self.env_name, worker_id=0, use_visual=False, side_channels=[channel], no_graphics = False, multiagent = True)
     channel.set_configuration_parameters(time_scale = 100)
     self.action_size, self.state_size  = Utils.getActionStateSize(self.env)
     self.n_agents = self.env.number_agents
     print("Nº of Agents: ",self.n_agents)
     
     # Shared model
     self.shared_model = ActorCritic(self.state_size, self.action_size, seed = 0).to(device)
     
     # Agents models
     self.agent_model = []
     self.optimizer = []
     for i in range(self.n_agents):
         self.agent_model.append(ActorCritic(self.state_size, self.action_size, seed = 0).to(device))
         self.optimizer.append(optim.Adam(self.agent_model[i].parameters(), lr=self.learning_rate))
     
     # Buffer memory
     self.memory = []
     for _ in range(self.n_agents):
         self.memory.append(Buffer())
         
     # Initialize time step (for updating every "batch_size" time steps)
     self.t_step = 1
示例#26
0
 def __init__(self, env_args):
     self.engine_configuration_channel = EngineConfigurationChannel()
     if env_args['train_mode']:
         self.engine_configuration_channel.set_configuration_parameters(
             time_scale=env_args['train_time_scale'])
     else:
         self.engine_configuration_channel.set_configuration_parameters(
             width=env_args['width'],
             height=env_args['height'],
             quality_level=env_args['quality_level'],
             time_scale=env_args['inference_time_scale'],
             target_frame_rate=env_args['target_frame_rate'])
     self.float_properties_channel = EnvironmentParametersChannel()
     if env_args['file_path'] is None:
         self._env = UnityEnvironment(base_port=5004,
                                      seed=env_args['env_seed'],
                                      side_channels=[
                                          self.engine_configuration_channel,
                                          self.float_properties_channel
                                      ])
     else:
         unity_env_dict = load_yaml('/'.join(
             [os.getcwd(), 'rls', 'envs', 'unity_env_dict.yaml']))
         self._env = UnityEnvironment(
             file_name=env_args['file_path'],
             base_port=env_args['port'],
             no_graphics=not env_args['render'],
             seed=env_args['env_seed'],
             side_channels=[
                 self.engine_configuration_channel,
                 self.float_properties_channel
             ],
             additional_args=[
                 '--scene',
                 str(
                     unity_env_dict.get(env_args.get('env_name', 'Roller'),
                                        'None')), '--n_agents',
                 str(env_args.get('env_num', 1))
             ])
     self.reset_config = env_args['reset_config']
示例#27
0
def unity_env_fn(agent_file, time_scale, no_graphics, worker_id):
    """Wrapper function for making unity environment with custom
    speed and graphics options.

    Args:
        agent_file (str): path to the environment binary
        time_scale (float): speed at which to run the simulation
        no_graphics (bool): whether or not to show the simulation

    Returns:
        Gym environment.
    """
    channel = EngineConfigurationChannel()
    unity_env = UnityEnvironment(
        file_name=agent_file,
        no_graphics=no_graphics,
        side_channels=[channel],
        worker_id=worker_id,
    )
    channel.set_configuration_parameters(time_scale=time_scale, )
    env = UnityToGymWrapper(unity_env)
    return env
示例#28
0
def main(params):
    config = vars(parser.parse_args())

    channel = EngineConfigurationChannel()
    unity_env = UnityEnvironment(file_name=None, side_channels=[channel])
    channel.set_configuration_parameters(time_scale=20.0)

    env = UnityToGymWrapper(unity_env)

    agent = DDQN(env, cfg['agent'])
    tag = 'DDQN'

    # Initiate the tracker for stats
    tracker = Tracker("TurtleBot3", tag, seed, cfg['agent'],
                      ['Epoch', 'Ep_Reward'])

    # Train the agent
    agent.train(tracker,
                n_episodes=config['epochs'],
                verbose=config['verbose'],
                params=cfg['agent'],
                hyperp=config)
示例#29
0
    def __init__(self, config=DEFAULT_ENV_CONFIG):
        """
        Environment initialization
        :param config: Configuration of the environment.
        """

        # create side channels
        self.env_param_channel = EnvironmentParametersChannel()
        self.engine_channel = EngineConfigurationChannel()
        self.color_pool_channel = IntListPropertiesChannel()

        side_channels = [
            self.env_param_channel,
            self.engine_channel,
            self.color_pool_channel,
        ]

        # flag whether the config has been apllied to the environment
        self.is_already_initialized = False
        # create environment with config and side channels
        super().__init__(config,
                         DEFAULT_ENV_CONFIG,
                         side_channels=side_channels)
    def __init__(self,
                 train_mode=True,
                 file_name=None,
                 base_port=5005,
                 seed=None,
                 scene=None,
                 n_agents=1):

        seed = seed if seed is not None else np.random.randint(0, 65536)

        self.engine_configuration_channel = EngineConfigurationChannel()
        self.environment_parameters_channel = EnvironmentParametersChannel()

        self._env = UnityEnvironment(
            file_name=file_name,
            base_port=base_port,
            seed=seed,
            args=['--scene', scene, '--n_agents',
                  str(n_agents)],
            side_channels=[
                self.engine_configuration_channel,
                self.environment_parameters_channel
            ])

        if train_mode:
            self.engine_configuration_channel.set_configuration_parameters(
                width=200, height=200, quality_level=0, time_scale=100)
        else:
            self.engine_configuration_channel.set_configuration_parameters(
                width=1028,
                height=720,
                quality_level=5,
                time_scale=5,
                target_frame_rate=60)

        self._env.reset()
        self.bahavior_name = self._env.get_behavior_names()[0]