示例#1
0
    def __init__(self, environment_filename, no_graphics):
        engine_configuration_channel = EngineConfigurationChannel()
        self.env = UnityEnvironment(
            file_name=environment_filename,
            side_channels=[engine_configuration_channel],
            no_graphics=no_graphics)
        self.env.reset()

        self.brain_name = self.env.get_agent_groups()
        self.group_spec = self.env.get_agent_group_spec(self.brain_name[0])
        engine_configuration_channel.set_configuration_parameters(
            width=640, height=480, time_scale=3.0)
        self.group_name = self.brain_name

        # Set observation and action spaces
        if self.group_spec.is_action_discrete():
            self._action_space = []
            branches = self.group_spec.discrete_action_branches
            # if self.group_spec.action_shape == 1:
            for _ in range(2):
                self._action_space.append([
                    spaces.Discrete(branches[i]) for i in range(len(branches))
                ])
        else:
            high = np.array([1] * self.group_spec.action_shape)
            self._action_space = spaces.Box(-high, high, dtype=np.float32)

        high = np.array([np.inf] * self._get_vec_obs_size())

        self._observation_space = spaces.Box(-high, high, dtype=np.float32)
示例#2
0
文件: learner.py 项目: jun-hyeok/test
    def initialize_env(self, config, env_file) -> Environment:
        """
        Initialize the environment.

        Args:
            config: the configuration parameters.
            env_file: the environment file.
        Returns:
            env: Environment
        """
        # [3] Environment configuration
        base_port = int(input("Enter base port: "))
        time_scale = int(config.get("time_scale"))
        width = int(config.get("width"))
        height = int(config.get("height"))

        channel_config = EngineConfigurationChannel()
        channel_param = EnvironmentParametersChannel()

        env = Environment(
            file_name=env_file,
            base_port=base_port,
            side_channels=[channel_config, channel_param],
        )

        channel_config.set_configuration_parameters(time_scale=time_scale,
                                                    quality_level=1,
                                                    width=width,
                                                    height=height)

        env.set_float_parameters(config)
        return env
示例#3
0
def train():
    engine_configuration_channel = EngineConfigurationChannel()
    # 時間スケールを20倍に設定
    engine_configuration_channel.set_configuration_parameters(time_scale=20.0)
    unity_env = UnityEnvironment("./ml-agents/Project/PushBlock", side_channels=[engine_configuration_channel])
    env = UnityToGymWrapper(unity_env, 0, flatten_branched=True)
    logger.configure('./logs')
    # DQNで学習
    model = deepq.learn(
        env,
        "mlp",
        seed=0,
        lr=2.5e-4,
        total_timesteps=400000,
        buffer_size=50000,
        exploration_fraction=0.05,
        exploration_final_eps=0.1,
        print_freq=20,
        train_freq=5,
        learning_starts=20000,
        target_network_update_freq=50,
        gamma=0.99,
        prioritized_replay=False,
        checkpoint_freq=1000,
        dueling=True,
        checkpoint_path=None,
        load_path="./model"
    )

    # モデルを保存
    save_path = "./model"
    ckpt = tf.train.Checkpoint(model=model)
    manager = tf.train.CheckpointManager(ckpt, save_path, max_to_keep=1)
    manager.save()
示例#4
0
    def __init__(self,
                 unity_env,
                 time_scale=1.0,
                 width=720,
                 height=480,
                 target_frame_rate=60,
                 quality_level=5):
        """ Initializes the game

        :param unity_env: (UnityEnvironment) Environment where the game will be played
        :param time_scale:(float) Speed of the game
        :param width:(int) Window's width
        :param height:(int) Window's height
        :param target_frame_rate:(int) Frame rate
        :param quality_level:(int) Visual quality

        Todo: Commentate a little, reorganise
        """
        self.unity_env = unity_env
        self.unity_env.reset()
        engine_configuration_channel = EngineConfigurationChannel()
        engine_configuration_channel.set_configuration_parameters(
            time_scale=time_scale,
            width=width,
            height=height,
            target_frame_rate=target_frame_rate,
            quality_level=quality_level)
        self.unity_env.side_channels[2] = engine_configuration_channel

        self.group_name = unity_env.get_agent_groups()[0]
        self.group_spec = unity_env.get_agent_group_spec(self.group_name)
        self.n_agents = self.unity_env.get_step_result(
            self.group_name).n_agents()
        self.action_size = self.group_spec.action_size
    def make_unity_env(self,
                       env_name,
                       float_params=dict(),
                       time_scale=1,
                       seed=time.time(),
                       worker_id=None,
                       **kwargs):
        """
        creates a gym environment from a unity game

        env_name: str
            the path to the game
        float_params: dict or None
            this should be a dict of argument settings for the unity
            environment
            keys: varies by environment
        time_scale: float
            argument to set Unity's time scale. This applies less to
            gym wrapped versions of Unity Environments, I believe..
            but I'm not sure
        seed: int
            the seed for randomness
        worker_id: int
            must specify a unique worker id for each unity process
            on this machine
        """
        if float_params is None: float_params = dict()
        path = os.path.expanduser(env_name)
        channel = EngineConfigurationChannel()
        env_channel = EnvironmentParametersChannel()
        channel.set_configuration_parameters(time_scale=1)
        for k, v in float_params.items():
            if k == "validation" and v >= 1:
                print("Game in validation mode")
            env_channel.set_float_parameter(k, float(v))
        if worker_id is None: worker_id = seed % 500 + 1
        env_made = False
        n_loops = 0
        worker_id = 0
        while not env_made and n_loops < 50:
            try:
                env = UnityEnvironment(file_name=path,
                                       side_channels=[channel, env_channel],
                                       worker_id=worker_id,
                                       seed=seed)
                env_made = True
            except:
                s = "Error encountered making environment, "
                s += "trying new worker_id"
                print(s)
                worker_id = (worker_id + 1 +
                             int(np.random.random() * 100)) % 500
                try:
                    env.close()
                except:
                    pass
                n_loops += 1
        env = UnityToGymWrapper(env, allow_multiple_obs=True)
        return env
示例#6
0
class FQ_Env(object):
    def __init__(self):
        self.engine_configuration_channel = EngineConfigurationChannel()
        self.env = UnityEnvironment(side_channels=[self.engine_configuration_channel])
        self.engine_configuration_channel.set_configuration_parameters(
            # width = 84,
            # height = 84,
            # quality_level = 5, #1-5
            time_scale = 1  # 1-100
            # target_frame_rate = 60, #1-60
            # capture_frame_rate = 60 #default 60
        )

        self.reset()

        self.n = self.agent_num()

        self.state_shapes = [self.env.get_behavior_spec(behavior_name).observation_shapes[0][0] for behavior_name in
                             self.env.get_behavior_names()]
        self.action_dims = [self.env.get_behavior_spec(behavior_name).action_shape for behavior_name in
                             self.env.get_behavior_names()]
    def agent_num(self):
        behavior_names = self.env.get_behavior_names()
        agent_num = len(behavior_names)
        return agent_num

    def reset(self):
        self.env.reset()
        cur_state = []
        for behavior_name in self.env.get_behavior_names():
            DecisionSteps, TerminalSteps = self.env.get_steps(behavior_name)
            cur_state.append(DecisionSteps.obs[0][0])
        return cur_state

    def step(self, actions):
        next_state = []
        reward = []
        done = []
        for behavior_name_index, behavior_name in enumerate(self.env.get_behavior_names()):
            self.env.set_actions(behavior_name=behavior_name, action=np.asarray([actions[behavior_name_index]]))
        self.env.step()

        for behavior_name in self.env.get_behavior_names():
            DecisionSteps, TerminalSteps = self.env.get_steps(behavior_name)
            if len(TerminalSteps.reward) == 0:
                next_state.append(DecisionSteps.obs[0][0])
                reward.append(DecisionSteps.reward[0])
                done.append(False)
            else:
                next_state.append(TerminalSteps.obs[0][0])
                reward.append(TerminalSteps.reward[0])
                done.append(True)

        return next_state, reward, done

    def close(self):
        self.env.close()
示例#7
0
文件: main.py 项目: rapsealk/Mummy
def main():
    """
    file_name: is the name of the environment binary (located in the root directory of the python project)
    worker_id: indicates which port to use for communication with the environment. For use in parallel training regimes such as A3C.
    seed: indicates the seed to use when generating random numbers during the training process.
          In environments which are deterministic, setting the seed enables reproducible experimentation by ensuring that the environment and trainers utilize the same random seed.
    side_channels: provides a way to exchange data with the Unity simulation that is not related to the reinforcement learning loop.
                   For example: configurations or properties.
                   More on them in the "Modifying the environment from Python"(https://github.com/Unity-Technologies/ml-agents/blob/master/docs/Python-API.md#modifying-the-environment-from-python) section.
    ---
    env.reset()
    env.step()
    env.close()
    """
    channel = EngineConfigurationChannel()

    filename = "Mummy"
    env = UnityEnvironment(file_name=filename, seed=1, side_channels=[channel])

    channel.set_configuration_parameters(time_scale=2.0)

    env.reset()
    behavior_names = env.behavior_specs.keys()
    for name in behavior_names:
        print('behavior_name:', name)  # Mummy?team=0

    decision_steps, terminal_steps = env.get_steps(
        behavior_name="Mummy?team=0")
    """
    print('DecisionSteps')
    print('- observation:', decision_steps.obs)
    print('- reward:', decision_steps.reward)
    print('- agent_id:', decision_steps.agent_id)
    print('- action_mask:', decision_steps.action_mask)

    print('TerminalSteps')
    print('- observation:', terminal_steps.obs)
    print('- reward:', terminal_steps.reward)
    print('- agent_id:', terminal_steps.agent_id)
    print('- interrupted:', terminal_steps.interrupted)
    """

    while True:
        for i in decision_steps.agent_id:
            if i in terminal_steps.agent_id:
                continue
            env.set_action_for_agent(behavior_name="Mummy?team=0",
                                     agent_id=i,
                                     action=np.random.uniform(-1.0,
                                                              1.0,
                                                              size=(2, )))
        env.step()

        decision_steps, terminal_steps = env.get_steps(
            behavior_name="Mummy?team=0")
示例#8
0
class UnityWrapper(object):
    def __init__(self, env_args):
        self.engine_configuration_channel = EngineConfigurationChannel()
        if env_args['train_mode']:
            self.engine_configuration_channel.set_configuration_parameters(
                time_scale=env_args['train_time_scale'])
        else:
            self.engine_configuration_channel.set_configuration_parameters(
                width=env_args['width'],
                height=env_args['height'],
                quality_level=env_args['quality_level'],
                time_scale=env_args['inference_time_scale'],
                target_frame_rate=env_args['target_frame_rate'])
        self.float_properties_channel = EnvironmentParametersChannel()
        if env_args['file_path'] is None:
            self._env = UnityEnvironment(base_port=5004,
                                         seed=env_args['env_seed'],
                                         side_channels=[
                                             self.engine_configuration_channel,
                                             self.float_properties_channel
                                         ])
        else:
            unity_env_dict = load_yaml('/'.join(
                [os.getcwd(), 'rls', 'envs', 'unity_env_dict.yaml']))
            self._env = UnityEnvironment(
                file_name=env_args['file_path'],
                base_port=env_args['port'],
                no_graphics=not env_args['render'],
                seed=env_args['env_seed'],
                side_channels=[
                    self.engine_configuration_channel,
                    self.float_properties_channel
                ],
                additional_args=[
                    '--scene',
                    str(
                        unity_env_dict.get(env_args.get('env_name', 'Roller'),
                                           'None')), '--n_agents',
                    str(env_args.get('env_num', 1))
                ])
        self.reset_config = env_args['reset_config']

    def reset(self, **kwargs):
        reset_config = kwargs.get('reset_config', None) or self.reset_config
        for k, v in reset_config.items():
            self.float_properties_channel.set_float_parameter(k, v)
        self._env.reset()

    def __getattr__(self, name):
        if name.startswith('_'):
            raise AttributeError(
                "attempted to get missing private attribute '{}'".format(name))
        return getattr(self._env, name)
示例#9
0
 def _create_env(self, env_file, time_scale, no_graphics):
     channel = EngineConfigurationChannel()
     env = UnityEnvironment(
         file_name=env_file,
         no_graphics=no_graphics,
         side_channels=[channel],
         # See if setting a worker id allows me to spin up more agents
         worker_id=proc_id(),
     )
     channel.set_configuration_parameters(
         time_scale=time_scale,
     )
     return env
示例#10
0
 def initialize_all_side_channels(self, initialize_config, engine_config):
     """
     初始化所有的通讯频道
     """
     engine_configuration_channel = EngineConfigurationChannel()
     engine_configuration_channel.set_configuration_parameters(
         **engine_config)
     float_properties_channel = EnvironmentParametersChannel()
     float_properties_channel.set_float_parameter('env_copies',
                                                  self._n_copies)
     for k, v in initialize_config.items():
         float_properties_channel.set_float_parameter(k, v)
     return dict(engine_configuration_channel=engine_configuration_channel,
                 float_properties_channel=float_properties_channel)
示例#11
0
    def __init__(self):

        # Hyperparameters
        self.learning_rate = 0.0003
        self.betas = (0.9, 0.999)
        self.gamma = 0.99
        self.eps_clip = 0.2
        self.buffer_size = 2048
        self.batch_size = 256
        self.K_epochs = 3
        self.max_steps = 100000

        self.tau = 0.95
        self.entropy_coef = 0.001
        self.value_loss_coef = 0.5

        self.summary_freq = 1000

        # Environment
        self.env_name = "Environments/env1/Unity Environment"
        channel = EngineConfigurationChannel()
        self.env = UnityEnv(self.env_name,
                            worker_id=0,
                            use_visual=False,
                            side_channels=[channel],
                            no_graphics=False,
                            multiagent=True)
        channel.set_configuration_parameters(time_scale=100)
        self.action_size, self.state_size = Utils.getActionStateSize(self.env)
        self.n_agents = self.env.number_agents
        print("Nº of Agents: ", self.n_agents)

        # Model
        self.model = ActorCritic(self.state_size, self.action_size,
                                 seed=0).to(device)
        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=self.learning_rate,
                                    betas=self.betas)
        self.MseLoss = nn.MSELoss()

        # Buffer memory
        self.memory = []
        for _ in range(self.n_agents):
            self.memory.append(Buffer())

        # Initialize time step (for updating when buffer_size is full)
        self.t_step = 1
示例#12
0
    def __init__(self):

        # Hyperparameters
        self.learning_rate = 0.0003
        self.buffer_size = 10240
        self.batch_size = 1024
        self.gamma = 0.99
        self.update_every = 64
        self.max_steps = 100000

        self.epsilon = 1.0
        self.epsilon_end = 0.01
        self.epsilon_decay = 0.995
        self.tau = 0.01

        self.summary_freq = 1000

        # Environment
        self.env_name = "Environments/env1/Unity Environment"
        channel = EngineConfigurationChannel()
        self.env = UnityEnv(self.env_name,
                            worker_id=0,
                            use_visual=False,
                            side_channels=[channel],
                            no_graphics=False,
                            multiagent=False)
        channel.set_configuration_parameters(time_scale=100)
        self.action_size, self.state_size = Utils.getActionStateSize(self.env)
        self.n_agents = self.env.number_agents

        # Models
        self.local_model = QNetwork(self.state_size, self.action_size,
                                    seed=0).to(device)
        self.target_model = QNetwork(self.state_size, self.action_size,
                                     seed=0).to(device)
        self.optimizer = optim.Adam(self.local_model.parameters(),
                                    lr=self.learning_rate)

        # Buffer memory
        self.memory = Buffer(self.buffer_size,
                             self.batch_size,
                             seed=0,
                             device=device)

        # Initialize time step (for updating every "update_every" time steps)
        self.t_step = 0
示例#13
0
文件: wrappers.py 项目: zhijie-ai/RLs
 def initialize_all_side_channels(self, kwargs):
     '''
     初始化所有的通讯频道
     '''
     engine_configuration_channel = EngineConfigurationChannel()
     engine_configuration_channel.set_configuration_parameters(
         width=kwargs['width'],
         height=kwargs['height'],
         quality_level=kwargs['quality_level'],
         time_scale=1
         if bool(kwargs.get('inference', False)) else kwargs['time_scale'],
         target_frame_rate=kwargs['target_frame_rate'],
         capture_frame_rate=kwargs['capture_frame_rate'])
     float_properties_channel = EnvironmentParametersChannel()
     for k, v in kwargs.get('initialize_config', {}).items():
         float_properties_channel.set_float_parameter(k, v)
     return dict(engine_configuration_channel=engine_configuration_channel,
                 float_properties_channel=float_properties_channel)
示例#14
0
def play():
    engine_configuration_channel = EngineConfigurationChannel()
    # 時間スケールを10倍に設定
    engine_configuration_channel.set_configuration_parameters(time_scale=10.0)
    unity_env = UnityEnvironment("./ml-agents/Project/PushBlock",
                                 side_channels=[engine_configuration_channel])
    env = UnityToGymWrapper(unity_env, 0, flatten_branched=True)
    # モデル読み込み
    model = deepq.learn(env, "mlp", total_timesteps=0, load_path="./model")

    obs = env.reset()
    obs = np.expand_dims(np.array(obs), axis=0)

    while True:
        action, _, _, _ = model.step(tf.constant(obs))
        action = action[0].numpy()
        obs, rew, done, _ = env.step(action)
        if done:
            obs = env.reset()
        obs = np.expand_dims(np.array(obs), axis=0)
示例#15
0
def test_set_action_multi_agent():
    engine_config_channel = EngineConfigurationChannel()
    env = default_registry[BALL_ID].make(
        base_port=6001,
        worker_id=0,
        no_graphics=True,
        side_channels=[engine_config_channel],
    )
    engine_config_channel.set_configuration_parameters(time_scale=100)
    for _ in range(3):
        env.reset()
        behavior_name = list(env.behavior_specs.keys())[0]
        d, t = env.get_steps(behavior_name)
        for _ in range(50):
            action = np.ones((len(d), 2))
            action_tuple = ActionTuple()
            action_tuple.add_continuous(action)
            env.set_actions(behavior_name, action_tuple)
            env.step()
            d, t = env.get_steps(behavior_name)
    env.close()
示例#16
0
    def initialise_environment(self):
        """Initialise and reset unity environment"""
        engine_configuration_channel = EngineConfigurationChannel()
        self.float_properties_channel = FloatPropertiesChannel()
        self.env = UnityEnvironment(file_name=self.env_path,
                                    base_port=5004,
                                    side_channels=[
                                        engine_configuration_channel,
                                        self.float_properties_channel
                                    ])

        # Reset the environment
        self.env.reset()

        # Set the default brain to work with
        self.group_name = self.env.get_agent_groups()[0]
        self.group_spec = self.env.get_agent_group_spec(self.group_name)

        # Set the time scale of the engine
        engine_configuration_channel.set_configuration_parameters(
            time_scale=self.time_scale)
示例#17
0
def test_engine_configuration():
    sender = EngineConfigurationChannel()
    # We use a raw bytes channel to interpred the data
    receiver = RawBytesChannel(sender.channel_id)

    config = EngineConfig.default_config()
    sender.set_configuration(config)
    data = SideChannelManager([sender]).generate_side_channel_messages()
    SideChannelManager([receiver]).process_side_channel_message(data)

    received_data = receiver.get_and_clear_received_messages()
    assert len(received_data) == 5  # 5 different messages one for each setting

    sent_time_scale = 4.5
    sender.set_configuration_parameters(time_scale=sent_time_scale)

    data = SideChannelManager([sender]).generate_side_channel_messages()
    SideChannelManager([receiver]).process_side_channel_message(data)

    message = IncomingMessage(receiver.get_and_clear_received_messages()[0])
    message.read_int32()
    time_scale = message.read_float32()
    assert time_scale == sent_time_scale

    with pytest.raises(UnitySideChannelException):
        sender.set_configuration_parameters(width=None, height=42)

    with pytest.raises(UnityCommunicationException):
        # try to send data to the EngineConfigurationChannel
        sender.set_configuration_parameters(time_scale=sent_time_scale)
        data = SideChannelManager([sender]).generate_side_channel_messages()
        SideChannelManager([sender]).process_side_channel_message(data)
示例#18
0
 def __init__(self):
    
     # Hyperparameters
     self.learning_rate = 0.0003
     self.gamma = 0.99
     self.batch_size = 256
     self.max_steps = 100000
     
     self.tau = 0.95
     self.entropy_coef = 0.001
     self.value_loss_coef = 0.5
     
     self.summary_freq = 1000
     
     # Environment
     self.env_name = "Environments/env1/Unity Environment"
     channel = EngineConfigurationChannel()
     self.env = UnityEnv(self.env_name, worker_id=0, use_visual=False, side_channels=[channel], no_graphics = False, multiagent = True)
     channel.set_configuration_parameters(time_scale = 100)
     self.action_size, self.state_size  = Utils.getActionStateSize(self.env)
     self.n_agents = self.env.number_agents
     print("Nº of Agents: ",self.n_agents)
     
     # Shared model
     self.shared_model = ActorCritic(self.state_size, self.action_size, seed = 0).to(device)
     
     # Agents models
     self.agent_model = []
     self.optimizer = []
     for i in range(self.n_agents):
         self.agent_model.append(ActorCritic(self.state_size, self.action_size, seed = 0).to(device))
         self.optimizer.append(optim.Adam(self.agent_model[i].parameters(), lr=self.learning_rate))
     
     # Buffer memory
     self.memory = []
     for _ in range(self.n_agents):
         self.memory.append(Buffer())
         
     # Initialize time step (for updating every "batch_size" time steps)
     self.t_step = 1
示例#19
0
def main(params):
    config = vars(parser.parse_args())

    channel = EngineConfigurationChannel()
    unity_env = UnityEnvironment(file_name=None, side_channels=[channel])
    channel.set_configuration_parameters(time_scale=20.0)

    env = UnityToGymWrapper(unity_env)

    agent = DDQN(env, cfg['agent'])
    tag = 'DDQN'

    # Initiate the tracker for stats
    tracker = Tracker("TurtleBot3", tag, seed, cfg['agent'],
                      ['Epoch', 'Ep_Reward'])

    # Train the agent
    agent.train(tracker,
                n_episodes=config['epochs'],
                verbose=config['verbose'],
                params=cfg['agent'],
                hyperp=config)
示例#20
0
def unity_env_fn(agent_file, time_scale, no_graphics, worker_id):
    """Wrapper function for making unity environment with custom
    speed and graphics options.

    Args:
        agent_file (str): path to the environment binary
        time_scale (float): speed at which to run the simulation
        no_graphics (bool): whether or not to show the simulation

    Returns:
        Gym environment.
    """
    channel = EngineConfigurationChannel()
    unity_env = UnityEnvironment(
        file_name=agent_file,
        no_graphics=no_graphics,
        side_channels=[channel],
        worker_id=worker_id,
    )
    channel.set_configuration_parameters(time_scale=time_scale, )
    env = UnityToGymWrapper(unity_env)
    return env
示例#21
0
def main():
    agent_file = "3DBall_single/3DBall_single.x86_64"
    no_graphics = True
    channel = EngineConfigurationChannel()
    unity_env = UnityEnvironment(file_name=agent_file,
                                 seed=1,
                                 no_graphics=no_graphics,
                                 side_channels=[channel])
    channel.set_configuration_parameters(time_scale=50., )
    env = UnityToGymWrapper(unity_env)
    l1, l2 = 64, 64
    activation = nn.ReLU
    output_activation = nn.Tanh
    ac = TD3ActorCritic(env.observation_space,
                        env.action_space,
                        l1,
                        l2,
                        activation=activation)

    params = dict(
        gamma=0.99,
        polyak=0.995,
        act_noise=0.1,
        target_noise=0.2,
        epochs=100,
        steps_per_epoch=4000,
        start_steps=10000,
        batch_size=256,
        update_after=10000,
        update_every=50,
        policy_delay=2,
        lr=1e-3,
    )

    model = TD3(ac=ac, env=env, **params)
    model.train()
示例#22
0
文件: my_env.py 项目: Maxpridy/RLzoo
class MyEnv(gym.Env):
    def __init__(self, worker_id, realtime_mode=False):
        self.reset_parameters = EnvironmentParametersChannel()
        self.engine_config = EngineConfigurationChannel()

        env_path = "C:/myDesktop/source/gridworld_imitation/food_collector_4"

        self._env = UnityEnvironment(
            env_path,
            worker_id,
            side_channels=[self.reset_parameters, self.engine_config])
        self._env.reset()

        self.behavior_name = list(self._env.behavior_specs)[0]
        behavior_spec = self._env.behavior_specs[self.behavior_name]
        print(behavior_spec)

        if realtime_mode:
            self.engine_config.set_configuration_parameters(time_scale=1.0)
            self.reset_parameters.set_float_parameter("train-mode", 0.0)
        else:
            self.engine_config.set_configuration_parameters(time_scale=20.0)
            self.reset_parameters.set_float_parameter("train-mode", 1.0)

        self._flattener = ActionFlattener(
            behavior_spec.action_spec.discrete_branches)

    def reset(self):
        # for key, value in reset_params.items():
        #     self.reset_parameters.set_float_parameter(key, value)
        self._env.reset()
        info, terminal_info = self._env.get_steps(self.behavior_name)
        self.game_over = False

        obs, reward, done, info = self._single_step(info, terminal_info)
        return obs

    def step(self, action):
        # Use random actions for all other agents in environment.
        if self._flattener is not None and type(action) == int:
            # Translate action into list
            action = np.array(self._flattener.lookup_action(action))

        c_action = Action(action)

        self._env.set_actions(self.behavior_name, c_action)
        self._env.step()
        running_info, terminal_info = self._env.get_steps(self.behavior_name)
        obs, reward, done, info = self._single_step(running_info,
                                                    terminal_info)
        self.game_over = done

        return obs, reward, done, info

    def _single_step(self, info, terminal_info):
        if len(terminal_info) == 0:
            done = False
            use_info = info
        else:
            done = True
            use_info = terminal_info

        # 카메라, 센서 순으로 나옴
        output_info = {}
        output_info["visual_obs"] = use_info.obs[0][0]

        #obs = np.concatenate([use_info.obs[1][0], use_info.obs[2][0]])
        return use_info.obs[1][0], use_info.reward[0], done, output_info

    def close(self):
        self._env.close()

    def render(self):
        pass
示例#23
0
class UnityWrapper(Env):
    """This class wraps Unity environments.

    This wrapper has notable constraints:
        - Only one agent (no multi-agent environments).
        - Only one visual observation
        - Only discrete and multi-discrete action spaces (no continuous action space)"""

    def __init__(self, env_path, reset_params, worker_id = 1, no_graphis = False, realtime_mode = False,  record_trajectory = False):
        """Instantiates the Unity Environment from a specified executable.
        
        Arguments:
            env_path {string} -- Path to the executable of the environment
            reset_params {dict} -- Reset parameters of the environment such as the seed
        
        Keyword Arguments:
            worker_id {int} -- Port of the environment"s instance (default: {1})
            no_graphis {bool} -- Whether to allow the executable to render or not (default: {False})
            realtime_mode {bool} -- Whether to run the environment in real time or as fast as possible (default: {False})
            record_trajectory {bool} -- Whether to record the trajectory of an entire episode. This can be used for video recording. (default: {False})
        """
        # Initialize channels
        self.reset_parameters = EnvironmentParametersChannel()
        self.engine_config = EngineConfigurationChannel()

        # Prepare default reset parameters
        self._default_reset_parameters = {}
        for key, value in reset_params.items():
            self._default_reset_parameters[key] = value
            if key != "start-seed" or key != "num-seeds":
                self.reset_parameters.set_float_parameter(key, value)

        self._realtime_mode = realtime_mode
        if realtime_mode:
            self.engine_config.set_configuration_parameters(time_scale=1.0, width=1280, height=720)
        else:
            self.engine_config.set_configuration_parameters(time_scale=30.0, width=256, height=256)

        # Whether to record the trajectory of an entire episode
        self._record = record_trajectory

        # Launch the environment's executable
        self._env = UnityEnvironment(file_name = env_path, worker_id = worker_id, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config])
        # If the Unity Editor chould be used instead of a build
        # self._env = UnityEnvironment(file_name = None, worker_id = 0, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config])

        # Reset the environment
        self._env.reset()
        # Retrieve behavior configuration
        self._behavior_name = list(self._env.behavior_specs)[0]
        self._behavior_spec = self._env.behavior_specs[self._behavior_name]

        # Check whether this Unity environment is supported
        self._verify_environment()

        # Set action space properties
        if self._behavior_spec.action_spec.is_discrete():
            num_action_branches = self._behavior_spec.action_spec.discrete_size
            action_branch_dimensions = self._behavior_spec.action_spec.discrete_branches
            if num_action_branches == 1:
                self._action_space = spaces.Discrete(action_branch_dimensions[0])
            else:
                self._action_space = spaces.MultiDiscrete(action_branch_dimensions)

        # Count visual and vector observations
        self._num_vis_obs, self._num_vec_obs = 0, 0
        self._vec_obs_indices = []
        for index, obs in enumerate(self._behavior_spec.observation_specs):
            if len(obs) > 1:
                self._num_vis_obs = self._num_vis_obs + 1
                self._vis_obs_index = index
            else:
                self._num_vec_obs = self._num_vec_obs + 1
                self._vec_obs_indices.append(index)

        # Set visual observation space property
        if self._num_vis_obs == 1:
            vis_obs_shape = self._behavior_spec.observation_specs[self._vis_obs_index].shape

            self._visual_observation_space = spaces.Box(
                low = 0,
                high = 1.0,
                shape = vis_obs_shape,
                dtype = np.float32)
        else:
            self._visual_observation_space = None

        # Set vector observation space property
        if self._num_vec_obs > 0:
            # Determine the length of vec obs by summing the length of each distinct one
            vec_obs_length = sum([self._behavior_spec.observation_specs[i][0] for i in self._vec_obs_indices])
            self._vector_observatoin_space = (vec_obs_length, )
        else:
            self._vector_observatoin_space = None

        # Videos can only be recorded if the environment provides visual observations
        if self._record and self._visual_observation_space is None:
            UnityEnvironmentException("Videos cannot be rendered for a Unity environment that does not provide visual observations.")

    @property
    def unwrapped(self):
        """        
        Returns:
            {UnityWrapper} -- Environment in its vanilla (i.e. unwrapped) state
        """
        return self
    
    @property
    def action_space(self):
        """Returns the shape of the action space of the agent."""
        return self._action_space

    @property
    def action_names(self):
        return None

    @property
    def get_episode_trajectory(self):
        """Returns the trajectory of an entire episode as dictionary (vis_obs, vec_obs, rewards, actions). 
        """
        self._trajectory["action_names"] = self.action_names
        return self._trajectory if self._trajectory else None

    @property
    def visual_observation_space(self):
        return self._visual_observation_space

    @property
    def vector_observation_space(self):
        return self._vector_observatoin_space

    def reset(self, reset_params = None):
        """Resets the environment based on a global or just specified config.
        
        Keyword Arguments:
            config {dict} -- Reset parameters to configure the environment (default: {None})
        
        Returns:
            {numpy.ndarray} -- Visual observation
            {numpy.ndarray} -- Vector observation
        """
        # Track rewards of an entire episode
        self._rewards = []

        # Use initial or new reset parameters
        if reset_params is None:
            reset_params = self._default_reset_parameters
        else:
            reset_params = reset_params

        # Apply reset parameters
        for key, value in reset_params.items():
            # Skip reset parameters that are not used by the Unity environment
            if key != "start-seed" or key != "num-seeds":
                self.reset_parameters.set_float_parameter(key, value)

        # Sample the to be used seed
        if reset_params["start-seed"] > -1:
            seed = randint(reset_params["start-seed"], reset_params["start-seed"] + reset_params["num-seeds"] - 1)
        else:
            # Use unlimited seeds
            seed = -1
        self.reset_parameters.set_float_parameter("seed", seed)

        # Reset and verify the environment
        self._env.reset()
        info, terminal_info = self._env.get_steps(self._behavior_name)
        self._verify_environment()
        
        # Retrieve initial observations
        vis_obs, vec_obs, _, _ = self._process_agent_info(info, terminal_info)

        # Prepare trajectory recording
        self._trajectory = {
            "vis_obs": [vis_obs * 255], "vec_obs": [vec_obs],
            "rewards": [0.0], "actions": []
        }

        return vis_obs, vec_obs

    def step(self, action):
        """Runs one timestep of the environment"s dynamics.
        Once an episode is done, reset() has to be called manually.
                
        Arguments:
            action {List} -- A list of at least one discrete action to be executed by the agent

        Returns:
            {numpy.ndarray} -- Visual observation
            {numpy.ndarray} -- Vector observation
            {float} -- (Total) Scalar reward signaled by the environment
            {bool} -- Whether the episode of the environment terminated
            {dict} -- Further episode information (e.g. cumulated reward) retrieved from the environment once an episode completed
        """
        # Carry out the agent's action
        action_tuple = ActionTuple()
        action_tuple.add_discrete(np.asarray(action).reshape([1, -1]))
        self._env.set_actions(self._behavior_name, action_tuple)
        self._env.step()
        info, terminal_info = self._env.get_steps(self._behavior_name)

        # Process step results
        vis_obs, vec_obs, reward, done = self._process_agent_info(info, terminal_info)
        self._rewards.append(reward)

        # Record trajectory data
        if self._record:
            self._trajectory["vis_obs"].append(vis_obs * 255)
            self._trajectory["vec_obs"].append(vec_obs)
            self._trajectory["rewards"].append(reward)
            self._trajectory["actions"].append(action)

        # Episode information
        if done:
            info = {"reward": sum(self._rewards),
                    "length": len(self._rewards)}
        else:
            info = None

        return vis_obs, vec_obs, reward, done, info

    def close(self):
        """Shut down the environment."""
        self._env.close()

    def _process_agent_info(self, info, terminal_info):
        """Extracts the observations, rewards, dones, and episode infos.

        Args:
            info {DecisionSteps}: Current state
            terminal_info {TerminalSteps}: Terminal state

        Returns:
            vis_obs {ndarray} -- Visual observation if available, else None
            vec_obs {ndarray} -- Vector observation if available, else None
            reward {float} -- Reward signal from the environment
            done {bool} -- Whether the episode terminated or not
        """
        # Determine if the episode terminated or not
        if len(terminal_info) == 0:
            done = False
            use_info = info
        else:
            done = True
            use_info = terminal_info

        # Process visual observations
        if self.visual_observation_space is not None:
            vis_obs = use_info.obs[self._vis_obs_index][0]
        else:
            vis_obs = None

        # Process vector observations
        if self.vector_observation_space is not None:
            for i, dim in enumerate(self._vec_obs_indices):
                if i == 0:
                    vec_obs = use_info.obs[dim][0]
                else:
                    vec_obs = np.concatenate((vec_obs, use_info.obs[dim][0]))
        else:
            vec_obs = None

        return vis_obs, vec_obs, use_info.reward[0], done

    def _verify_environment(self):
        # Verify number of agent behavior types
        if len(self._env.behavior_specs) != 1:
            raise UnityEnvironmentException("The unity environment containts more than one agent type.")
        # Verify number of agents
        decision_steps, _ = self._env.get_steps(self._behavior_name)
        if len(decision_steps) > 1:
            raise UnityEnvironmentException("The unity environment contains more than one agent, which is not supported.")
        # Verify action space type
        if not self._behavior_spec.action_spec.is_discrete() or self._behavior_spec.action_spec.is_continuous():
            raise UnityEnvironmentException("Continuous action spaces are not supported. " 
                                            "Only discrete and MultiDiscrete spaces are supported.")
        # Verify that at least one observation is provided
        num_vis_obs = 0
        num_vec_obs = 0
        for obs_spec in self._behavior_spec.observation_specs:
            if len(obs_spec.shape) == 3:
                num_vis_obs += 1
            elif(len(obs_spec.shape)) == 1:
                num_vec_obs += 1
        if num_vis_obs == 0 and num_vec_obs == 0:
            raise UnityEnvironmentException("The unity environment does not contain any observations.")
        # Verify number of visual observations
        if num_vis_obs > 1:
            raise UnityEnvironmentException("The unity environment contains more than one visual observation.")
示例#24
0
class UnityWrapper(Env):
    """This class wraps Unity environments.

    This wrapper has notable constraints:
        - Only one agent (no multi-agent environments).
        - Only one visual observation
        - Only discrete and multi-discrete action spaces (no continuous action space)"""

    def __init__(self, env_path, worker_id = 1, no_graphis = False, realtime_mode = False, config = None):
        """Instantiates the Unity Environment from a specified executable.
        
        Arguments:
            env_path {string} -- Path to the executable of the environment
        
        Keyword Arguments:
            worker_id {int} -- Port of the environment"s instance (default: {1})
            no_graphis {bool} -- Whether to allow the executable to render or not (default: {False})
            realtime_mode {bool} -- Whether to run the environment in real time or as fast as possible (default: {False})
            config {dict} -- Specifies the reset parameters of the environment (default: {None})
        """
        # Disable logging
        logging.disable(logging.INFO)

        # Initialize channels
        self.reset_parameters = EnvironmentParametersChannel()
        self.engine_config = EngineConfigurationChannel()

        self._config = config
        self._realtime_mode = realtime_mode
        if realtime_mode:
            self.engine_config.set_configuration_parameters(time_scale=1.0, width=1280, height=720)
        else:
            self.engine_config.set_configuration_parameters(time_scale=20.0, width=128, height=128)

        # Launch the environment's executable
        self._env = UnityEnvironment(file_name = env_path, worker_id = worker_id, no_graphics = no_graphis, side_channels=[self.reset_parameters, self.engine_config])
        # Reset the environment
        self._env.reset()
        # Retrieve behavior configuration
        self._behavior_name = list(self._env.behavior_specs)[0]
        self._behavior_spec = self._env.behavior_specs[self._behavior_name]

        # Set action space properties
        if len(self._behavior_spec.action_shape) == 1:
            self._action_space = spaces.Discrete(self._behavior_spec.action_shape[0])
        else:
            self._action_space = spaces.MultiDiscrete(self._behavior_spec.action_shape)
        self._action_names = ["Not available"]
        
        # Count visual and vector observations
        self._num_vis_obs, self._num_vec_obs = 0, 0
        self._vec_obs_indices = []
        for index, obs in enumerate(self._behavior_spec.observation_shapes):
            if len(obs) > 1:
                self._num_vis_obs = self._num_vis_obs + 1
                self._vis_obs_index = index
            else:
                self._num_vec_obs = self._num_vec_obs + 1
                self._vec_obs_indices.append(index)

        # Verify the environment
        self._verify_environment()

        # Set visual observation space property
        if self._num_vis_obs == 1:
            height = self._behavior_spec.observation_shapes[self._vis_obs_index][0]
            width = self._behavior_spec.observation_shapes[self._vis_obs_index][1]
            depth = self._behavior_spec.observation_shapes[self._vis_obs_index][2]
            self._visual_observation_space = spaces.Box(
                low = 0,
                high = 1.0,
                shape = (height, width, depth),
                dtype = np.float32)
        else:
            self._visual_observation_space = None

        # Set vector observation space property
        if self._num_vec_obs > 0:
            # Determine the length of vec obs by summing the length of each distinct one
            vec_obs_length = sum([self._behavior_spec.observation_shapes[i][0] for i in self._vec_obs_indices])
            self._vector_observatoin_space = (vec_obs_length, )
        else:
            self._vector_observatoin_space = None

    @property
    def unwrapped(self):
        """        
        Returns:
            {UnityWrapper} -- Environment in its vanilla (i.e. unwrapped) state
        """
        return self
    
    @property
    def action_space(self):
        """Returns the shape of the action space of the agent."""
        return self._action_space

    @property
    def action_names(self):
        return self._action_names

    @property
    def visual_observation_space(self):
        return self._visual_observation_space

    @property
    def vector_observation_space(self):
        return self._vector_observatoin_space

    def reset(self, reset_params = None):
        """Resets the environment based on a global or just specified config.
        
        Keyword Arguments:
            config {dict} -- Reset parameters to configure the environment (default: {None})
        
        Returns:
            {numpy.ndarray} -- Visual observation
            {numpy.ndarray} -- Vector observation
        """
        # Track rewards of an entire episode
        self._rewards = []

        # Process config: Either load global or new config (if specified)
        if reset_params is None:
            reset_params = {}
            if self._config is not None:
                reset_params = self._config
        else:
            reset_params = reset_params

        # Apply reset parameters
        for key, value in reset_params.items():
            self.reset_parameters.set_float_parameter(key, value)

        # Reset and verify the environment
        self._env.reset()
        info, terminal_info = self._env.get_steps(self._behavior_name)
        self._verify_environment(len(info))
        
        # Retrieve initial observations
        vis_obs, vec_obs, _, _ = self._process_agent_info(info, terminal_info)
        return vis_obs, vec_obs

    def step(self, action):
        """Runs one timestep of the environment"s dynamics.
        Once an episode is done, reset() has to be called manually.
                
        Arguments:
            action {List} -- A list of at least one discrete action to be executed by the agent

        Returns:
            {numpy.ndarray} -- Visual observation
            {numpy.ndarray} -- Vector observation
            {float} -- (Total) Scalar reward signaled by the environment
            {bool} -- Whether the episode of the environment terminated
            {dict} -- Further episode information (e.g. cumulated reward) retrieved from the environment once an episode completed
        """
        # Carry out the agent's action
        self._env.set_actions(self._behavior_name, action.reshape([1, -1]))
        self._env.step()
        info, terminal_info = self._env.get_steps(self._behavior_name)

        # Process step results
        vis_obs, vec_obs, reward, done = self._process_agent_info(info, terminal_info)
        self._rewards.append(reward)

        # Episode information
        if done:
            info = {"reward": sum(self._rewards),
                    "length": len(self._rewards)}
        else:
            info = None

        return vis_obs, vec_obs, reward, done, info

    def close(self):
        """Shut down the environment."""
        self._env.close()

    def _process_agent_info(self, info, terminal_info):
        """Extracts the observations, rewards, dones, and episode infos.

        Args:
            info {DecisionSteps}: Current state
            terminal_info {TerminalSteps}: Terminal state

        Returns:
            vis_obs {ndarray} -- Visual observation if available, else None
            vec_obs {ndarray} -- Vector observation if available, else None
            reward {float} -- Reward signal from the environment
            done {bool} -- Whether the episode terminated or not
        """
        # Determine if the episode terminated or not
        if len(terminal_info) == 0:
            done = False
            use_info = info
        else:
            done = True
            use_info = terminal_info

        # Process visual observations
        if self.visual_observation_space is not None:
            vis_obs = use_info.obs[self._vis_obs_index][0]
        else:
            vis_obs = None

        # Process vector observations
        if self.vector_observation_space is not None:
            for i, dim in enumerate(self._vec_obs_indices):
                if i == 0:
                    vec_obs = use_info.obs[dim][0]
                else:
                    vec_obs = np.concatenate((vec_obs, use_info.obs[dim][0]))
        else:
            vec_obs = None

        return vis_obs, vec_obs, use_info.reward[0], done

    def _verify_environment(self, num_agents = None):
        """Checks if the environment meets the requirements of this wrapper.
        Only one agent and at maximum one visual observation is allowed.
        Only Discrete and MultiDiscrete action spaces are supported.

        Arguments:
            num_agents {int} -- Number of agents (default: {None})
        """
        # Verify number of agent types
        if len(self._env.behavior_specs) != 1:
            raise UnityEnvironmentException("The unity environment containts more than one agent type.")
        # Verify action space type
        if int(self._behavior_spec.action_type.value) == 1:
            raise UnityEnvironmentException("Continuous action spaces are not supported. Only discrete and MultiDiscrete spaces are supported.")
        # Verify number of visual observations
        if self._num_vis_obs > 1:
            raise UnityEnvironmentException("The unity environment contains more than one visual observation.")
        # Verify agent count
        if num_agents is not None and num_agents > 1:
            raise UnityEnvironmentException("The unity environment contains more than one agent.")
示例#25
0
if __name__ == "__main__":
    # Parsen der Parameterwerte bei Start des Programms
    parser = argparse.ArgumentParser()
    parser.add_argument("--cuda", default=False, action='store_true', help='Enable CUDA')
    parser.add_argument("-n", "--name", required=True, help="Name of the run")
    args = parser.parse_args()
    device = torch.device("cuda" if args.cuda else "cpu")

    save_path = os.path.join("saves", "d4pg-" + args.name)
    os.makedirs(save_path, exist_ok=True)

    # Wrappen der Unity-Umgebung in eine Gym-Umgebung
    channel = EngineConfigurationChannel()
    unity_env = UnityEnvironment(ENV_ID, seed=1, side_channels=[channel])
    channel.set_configuration_parameters(time_scale=20.0)
    env = UnityToGymWrapper(unity_env)

    # Erstellen des Modells nach der D4PG-Architektur
    act_net = model.D4PGActor(env.observation_space.shape[0], env.action_space.shape[0]).to(device)
    crt_net = model.D4PGCritic(env.observation_space.shape[0], env.action_space.shape[0], N_ATOMS, Vmin, Vmax).to(device)
    print(act_net)
    print(crt_net)
    tgt_act_net = ptan.agent.TargetNet(act_net)
    tgt_crt_net = ptan.agent.TargetNet(crt_net)

    # Erstellen des Agenten mit der PTAN-Bibliothek und des Buffers
    writer = SummaryWriter(comment="-d4pg_" + args.name)
    agent = model.AgentD4PG(act_net, device=device)
    exp_source = ptan.experience.ExperienceSourceFirstLast(env, agent, gamma=GAMMA, steps_count=REWARD_STEPS)
    buffer = ptan.experience.ExperienceReplayBuffer(exp_source, buffer_size=REPLAY_SIZE)
class UnityWrapperProcess:
    def __init__(self,
                 conn: multiprocessing.connection.Connection = None,
                 train_mode=True,
                 file_name=None,
                 worker_id=0,
                 base_port=5005,
                 no_graphics=True,
                 seed=None,
                 scene=None,
                 additional_args=None,
                 n_agents=1):
        """
        Args:
            train_mode: If in train mode, Unity will speed up
            file_name: The executable path. The UnityEnvironment will run in editor if None
            worker_id: Offset from base_port
            base_port: The port that communicate to Unity. It will be set to 5004 automatically if in editor.
            no_graphics: If Unity runs in no graphic mode. It must be set to False if Unity has camera sensor.
            seed: Random seed
            scene: The scene name
            n_agents: The agents count
        """
        self.scene = scene
        self.n_agents = n_agents

        seed = seed if seed is not None else np.random.randint(0, 65536)
        additional_args = [] if additional_args is None else additional_args.split(
            ' ')

        self.engine_configuration_channel = EngineConfigurationChannel()
        self.environment_parameters_channel = EnvironmentParametersChannel()

        self.environment_parameters_channel.set_float_parameter(
            'env_copys', float(n_agents))

        if conn:
            try:
                from algorithm import config_helper
                config_helper.set_logger()
            except:
                pass

            self._logger = logging.getLogger(
                f'UnityWrapper.Process_{os.getpid()}')
        else:
            self._logger = logging.getLogger('UnityWrapper.Process')

        self._env = UnityEnvironment(
            file_name=file_name,
            worker_id=worker_id,
            base_port=base_port if file_name else None,
            no_graphics=no_graphics and train_mode,
            seed=seed,
            additional_args=['--scene', scene] + additional_args,
            side_channels=[
                self.engine_configuration_channel,
                self.environment_parameters_channel
            ])

        self.engine_configuration_channel.set_configuration_parameters(
            width=200 if train_mode else 1280,
            height=200 if train_mode else 720,
            quality_level=5,
            time_scale=20 if train_mode else 1)

        self._env.reset()
        self.bahavior_name = list(self._env.behavior_specs)[0]

        if conn:
            try:
                while True:
                    cmd, data = conn.recv()
                    if cmd == INIT:
                        conn.send(self.init())
                    elif cmd == RESET:
                        conn.send(self.reset(data))
                    elif cmd == STEP:
                        conn.send(self.step(*data))
                    elif cmd == CLOSE:
                        self.close()
            except:
                self._logger.error(traceback.format_exc())

    def init(self):
        """
        Returns:
            observation shapes: tuple[(o1, ), (o2, ), (o3_1, o3_2, o3_3), ...]
            discrete action size: int, sum of all action branches
            continuous action size: int
        """
        behavior_spec = self._env.behavior_specs[self.bahavior_name]
        obs_names = [o.name for o in behavior_spec.observation_specs]
        self._logger.info(f'Observation names: {obs_names}')
        obs_shapes = [o.shape for o in behavior_spec.observation_specs]
        self._logger.info(f'Observation shapes: {obs_shapes}')

        self._empty_action = behavior_spec.action_spec.empty_action

        discrete_action_size = 0
        if behavior_spec.action_spec.discrete_size > 0:
            discrete_action_size = 1
            action_product_list = []
            for action, branch_size in enumerate(
                    behavior_spec.action_spec.discrete_branches):
                discrete_action_size *= branch_size
                action_product_list.append(range(branch_size))
                self._logger.info(
                    f"Discrete action branch {action} has {branch_size} different actions"
                )

            self.action_product = np.array(
                list(itertools.product(*action_product_list)))

        continuous_action_size = behavior_spec.action_spec.continuous_size

        self._logger.info(f'Continuous action size: {continuous_action_size}')

        self.d_action_size = discrete_action_size
        self.c_action_size = continuous_action_size

        for o in behavior_spec.observation_specs:
            if len(o.shape) >= 3:
                self.engine_configuration_channel.set_configuration_parameters(
                    quality_level=5)
                break

        return obs_shapes, discrete_action_size, continuous_action_size

    def reset(self, reset_config=None):
        """
        return:
            observations: list[(NAgents, o1), (NAgents, o2), (NAgents, o3_1, o3_2, o3_3)]
        """
        reset_config = {} if reset_config is None else reset_config
        for k, v in reset_config.items():
            self.environment_parameters_channel.set_float_parameter(
                k, float(v))

        self._env.reset()
        decision_steps, terminal_steps = self._env.get_steps(
            self.bahavior_name)

        return [obs.astype(np.float32) for obs in decision_steps.obs]

    def step(self, d_action, c_action):
        """
        Args:
            d_action: (NAgents, discrete_action_size), one hot like action
            c_action: (NAgents, continuous_action_size)

        Returns:
            observations: list[(NAgents, o1), (NAgents, o2), (NAgents, o3_1, o3_2, o3_3)]
            rewards: (NAgents, )
            done: (NAgents, ), np.bool
            max_step: (NAgents, ), np.bool
        """

        if self.d_action_size:
            d_action = np.argmax(d_action, axis=1)
            d_action = self.action_product[d_action]

        self._env.set_actions(
            self.bahavior_name,
            ActionTuple(continuous=c_action, discrete=d_action))
        self._env.step()

        decision_steps, terminal_steps = self._env.get_steps(
            self.bahavior_name)

        tmp_terminal_steps = terminal_steps

        while len(decision_steps) == 0:
            self._env.set_actions(self.bahavior_name, self._empty_action(0))
            self._env.step()
            decision_steps, terminal_steps = self._env.get_steps(
                self.bahavior_name)
            tmp_terminal_steps.agent_id = np.concatenate(
                [tmp_terminal_steps.agent_id, terminal_steps.agent_id])
            tmp_terminal_steps.reward = np.concatenate(
                [tmp_terminal_steps.reward, terminal_steps.reward])
            tmp_terminal_steps.interrupted = np.concatenate(
                [tmp_terminal_steps.interrupted, terminal_steps.interrupted])

        reward = decision_steps.reward
        reward[tmp_terminal_steps.agent_id] = tmp_terminal_steps.reward

        done = np.full([
            len(decision_steps),
        ], False, dtype=np.bool)
        done[tmp_terminal_steps.agent_id] = True

        max_step = np.full([
            len(decision_steps),
        ], False, dtype=np.bool)
        max_step[tmp_terminal_steps.agent_id] = tmp_terminal_steps.interrupted

        return ([obs.astype(np.float32) for obs in decision_steps.obs],
                decision_steps.reward.astype(np.float32), done, max_step)

    def close(self):
        self._env.close()
        self._logger.warning(f'Process {os.getpid()} exits')
示例#27
0
import time
import random
import numpy as np
from collections import deque
import torch.optim as optim
from mlagents_envs.environment import UnityEnvironment
from mlagents_envs.side_channel.engine_configuration_channel import EngineConfigurationChannel
import os

your_name = 'Veronica'

np.set_printoptions(precision=3)

channel = EngineConfigurationChannel()
env = UnityEnvironment(file_name='./env/football', side_channels=[channel])
channel.set_configuration_parameters(time_scale=0.25)
env.reset()

load_tensor = False

two_striker = False
num_episodes = 500
num_game_each_episode = 1000
epsilon = 1.0
epsilon_min = 0.05
epsilon_decay = 0.99
scores = [[], []]
best_avg_score = 0
scores_average_window = 5
striker_solved_score = 1
defender_solved_score = -1
示例#28
0
l1_size = 16  # number of neurons in 1st layer
l2_size = 32  # number of neurons in 2nd layer
POP_SIZE = 12  # population size

if __name__ == "__main__":
    try:
        # This is a non-blocking call that only loads the environment.
        print(
            "Script started. Please start Unity environment to start training proccess."
        )
        engine_channel = EngineConfigurationChannel()
        # env = UnityEnvironment( side_channels=[engine_channel])
        env = default_registry["3DBall"].make(side_channels=[engine_channel])
        engine_channel.set_configuration_parameters(
            time_scale=1, width=1920,
            height=1080)  # control time scale 0.5 - half speed, 10. - 10x time
        # Start interacting with the environment.
        env.reset()
        # Info about our environment ---------------------
        print(f"number of behaviours: {len(list(env.behavior_specs) )}")
        behavior_name = list(env.behavior_specs)[0]
        spec = env.behavior_specs[behavior_name]
        action_spec = spec.action_spec
        decision_steps, terminal_steps = env.get_steps(behavior_name)
        # Examine the number of observations per Agent
        print("Number of observations : ", len(spec.observation_shapes))
        print(" observations : ", spec.observation_shapes)
        # Is the Action continuous or multi-discrete ?
        if action_spec.is_continuous():
            print("The action is continuous")
示例#29
0
    def run_unity(self):
        from mlagents_envs.environment import UnityEnvironment
        from mlagents_envs.side_channel.engine_configuration_channel import EngineConfig, EngineConfigurationChannel
        if self.random_risk:
            path = 'runs/dqpg_td3_' + env_set[
                'env_name'] + '_' + "{:1.1f}".format(
                    self.risk_factor) + "_noisy"
        else:
            path = 'runs/dqpg_td3_' + env_set[
                'env_name'] + '_' + "{:1.1f}".format(self.risk_factor)
        writer = SummaryWriter(path)
        epsilon = 1
        self.ep = 0
        self.dynamic_risk = self.risk_factor

        engine_configuration_channel = EngineConfigurationChannel()
        env = UnityEnvironment(file_name="env/" + env_set['env_name'],
                               no_graphics=True,
                               side_channels=[engine_configuration_channel])
        env.reset()
        group_name = list(env.behavior_specs.keys())[0]
        group_spec = env.behavior_specs[group_name]
        engine_configuration_channel.set_configuration_parameters(
            time_scale=12.0)
        dec, term = env.get_steps(group_name)

        step = 0
        scores = np.zeros([self.worker_size])
        score = deque(maxlen=10)
        end_ep = env_set['ep_len']
        train_start_ep = 5
        vs = []
        ps = []

        #for i in range(1000*end_ep)
        while True:
            self.ep += 1
            if epsilon > 0.05:
                epsilon = -self.ep * 2.0 / end_ep + 1.0
            for i in range(1000):
                step += 1

                if self.ep > train_start_ep:
                    actions = self.get_action(dec.obs[0], epsilon,
                                              len(dec.agent_id))
                else:
                    actions = 2.0 * np.random.rand(len(dec.agent_id),
                                                   self.output_size) - 1.0

                env.set_actions(group_name, actions)
                if len(dec.agent_id) > 0:
                    old_dec = dec
                    old_action = actions
                    action_idx = old_dec.agent_id
                env.step()
                dec, term = env.get_steps(group_name)
                for idx in term.agent_id:
                    state = old_dec[idx].obs[0]
                    next_state = term[idx].obs[0]
                    reward = term[idx].reward
                    done = not term[idx].max_step
                    act = old_action[idx]
                    self.memory.append(state, next_state, reward, done, act)
                    scores[idx] += reward
                    score.append(scores[idx])
                    scores[idx] = 0
                for idx in dec.agent_id:
                    if idx in term.agent_id:
                        continue
                    state = old_dec[idx].obs[0]
                    next_state = dec[idx].obs[0]
                    reward = dec[idx].reward
                    done = False
                    act = old_action[idx]
                    self.memory.append(state, next_state, reward, done, act)
                    scores[idx] += reward

                if self.ep > train_start_ep:
                    if step % 2 == 0:
                        v, p = self.update()
                        vs.append(v)
                        ps.append(p)
                    else:
                        v = self.value_update()
                        vs.append(v)

            print('episode :', self.ep, '| score : ',
                  "{0:.2f}".format(np.mean(score)),
                  "[{0:.1f}]".format(np.std(score)), '| epsilon :',
                  "{0:.2f}".format(epsilon), " | v :",
                  "{0:.2f}".format(np.mean(vs)), " | p :",
                  "{0:.2f}".format(np.mean(ps)))
            if self.ep < end_ep:
                writer.add_scalar('data/reward', np.mean(score), self.ep)
                writer.add_scalar('data/reward_std', np.std(score), self.ep)
                writer.add_scalar('data/epsilon', epsilon, self.ep)
                writer.add_scalar('data/memory_size', len(self.memory.memory),
                                  self.ep)
                writer.add_scalar('loss/value', np.mean(vs), self.ep)
                writer.add_scalar('loss/policy', np.mean(ps), self.ep)
                writer.add_scalar('data/risk_factor', self.dynamic_risk,
                                  self.ep)
                vs.clear()
                ps.clear()
            else:
                break
示例#30
0
def test_run_environment(env_name):
    """
    Run the low-level API test using the specified environment
    :param env_name: Name of the Unity environment binary to launch
    """
    engine_configuration_channel = EngineConfigurationChannel()
    env = UnityEnvironment(
        file_name=env_name,
        side_channels=[engine_configuration_channel],
        no_graphics=True,
        additional_args=["-logFile", "-"],
    )

    try:
        # Reset the environment
        env.reset()

        # Set the default brain to work with
        group_name = list(env.behavior_specs.keys())[0]
        group_spec = env.behavior_specs[group_name]

        # Set the time scale of the engine
        engine_configuration_channel.set_configuration_parameters(
            time_scale=3.0)

        # Get the state of the agents
        decision_steps, terminal_steps = env.get_steps(group_name)

        # Examine the number of observations per Agent
        print("Number of observations : ", len(group_spec.sensor_specs))

        # Is there a visual observation ?
        vis_obs = any(
            len(sen_spec.shape) == 3 for sen_spec in group_spec.sensor_specs)
        print("Is there a visual observation ?", vis_obs)

        # Examine the state space for the first observation for the first agent
        print("First Agent observation looks like: \n{}".format(
            decision_steps.obs[0][0]))

        for _episode in range(10):
            env.reset()
            decision_steps, terminal_steps = env.get_steps(group_name)
            done = False
            episode_rewards = 0
            tracked_agent = -1
            while not done:
                action_tuple = group_spec.action_spec.random_action(
                    len(decision_steps))
                if tracked_agent == -1 and len(decision_steps) >= 1:
                    tracked_agent = decision_steps.agent_id[0]
                env.set_actions(group_name, action_tuple)
                env.step()
                decision_steps, terminal_steps = env.get_steps(group_name)
                done = False
                if tracked_agent in decision_steps:
                    episode_rewards += decision_steps[tracked_agent].reward
                if tracked_agent in terminal_steps:
                    episode_rewards += terminal_steps[tracked_agent].reward
                    done = True
            print(f"Total reward this episode: {episode_rewards}")
    finally:
        env.close()