示例#1
0
    def __init__(self, base_name, config):
        a2c_common.DiscreteA2CBase.__init__(self, base_name, config)
        obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)

        config = {
            'actions_num': self.actions_num,
            'input_shape': obs_shape,
            'num_seqs': self.num_actors * self.num_agents,
            'value_size': self.env_info.get('value_size', 1)
        }

        self.model = self.network.build(config)
        self.model.to(self.ppo_device)

        self.init_rnn_from_model(self.model)

        self.last_lr = float(self.last_lr)
        self.optimizer = optim.Adam(self.model.parameters(),
                                    float(self.last_lr),
                                    eps=1e-08,
                                    weight_decay=self.weight_decay)

        if self.normalize_input:
            self.running_mean_std = RunningMeanStd(obs_shape).to(
                self.ppo_device)

        if self.has_central_value:
            cv_config = {
                'state_shape': torch_ext.shape_whc_to_cwh(self.state_shape),
                'value_size': self.value_size,
                'ppo_device': self.ppo_device,
                'num_agents': self.num_agents,
                'num_steps': self.steps_num,
                'num_actors': self.num_actors,
                'num_actions': self.actions_num,
                'seq_len': self.seq_len,
                'model': self.central_value_config['network'],
                'config': self.central_value_config,
                'writter': self.writer,
                'multi_gpu': self.multi_gpu
            }
            self.central_value_net = central_value.CentralValueTrain(
                **cv_config).to(self.ppo_device)

        self.use_experimental_cv = self.config.get('use_experimental_cv',
                                                   False)
        self.dataset = datasets.PPODataset(self.batch_size,
                                           self.minibatch_size,
                                           self.is_discrete, self.is_rnn,
                                           self.ppo_device, self.seq_len)
        self.algo_observer.after_init(self)
示例#2
0
    def __init__(self, config):
        BasePlayer.__init__(self, config)

        self.network = config['network']
        if type(self.action_space) is gym.spaces.Discrete:
            self.actions_num = self.action_space.n
            self.is_multi_discrete = False
        if type(self.action_space) is gym.spaces.Tuple:
            self.actions_num = [action.n for action in self.action_space]
            self.is_multi_discrete = True
        self.mask = [False]

        self.normalize_input = self.config['normalize_input']

        obs_shape = torch_ext.shape_whc_to_cwh(self.state_shape)
        config = {
            'actions_num': self.actions_num,
            'input_shape': obs_shape,
            'num_seqs': self.num_agents,
            'value_size': self.value_size
        }

        self.model = self.network.build(config)
        self.model.to(self.device)
        self.model.eval()
        self.is_rnn = self.model.is_rnn()
        if self.normalize_input:
            self.running_mean_std = RunningMeanStd(obs_shape).to(self.device)
            self.running_mean_std.eval()
示例#3
0
    def __init__(self, config):
        BasePlayer.__init__(self, config)
        self.network = config['network']
        self.actions_num = self.action_space.shape[0]
        self.action_range = [
            float(self.env_info['action_space'].low.min()),
            float(self.env_info['action_space'].high.max())
        ]

        obs_shape = torch_ext.shape_whc_to_cwh(self.state_shape)
        self.normalize_input = False
        config = {
            'obs_dim': self.env_info["observation_space"].shape[0],
            'action_dim': self.env_info["action_space"].shape[0],
            'actions_num': self.actions_num,
            'input_shape': obs_shape
        }
        self.model = self.network.build(config)
        self.model.to(self.device)
        self.model.eval()
        self.is_rnn = self.model.is_rnn()
示例#4
0
    def __init__(self, config):
        BasePlayer.__init__(self, config)
        self.network = config['network']
        self.actions_num = self.action_space.shape[0]
        self.actions_low = torch.from_numpy(
            self.action_space.low.copy()).float().to(self.device)
        self.actions_high = torch.from_numpy(
            self.action_space.high.copy()).float().to(self.device)
        self.mask = [False]

        self.normalize_input = self.config['normalize_input']
        obs_shape = torch_ext.shape_whc_to_cwh(self.state_shape)
        config = {
            'actions_num': self.actions_num,
            'input_shape': obs_shape,
            'num_seqs': self.num_agents
        }
        self.model = self.network.build(config)
        self.model.to(self.device)
        self.model.eval()
        self.is_rnn = self.model.is_rnn()
        if self.normalize_input:
            self.running_mean_std = RunningMeanStd(obs_shape).to(self.device)
            self.running_mean_std.eval()
示例#5
0
        def __init__(self, params, **kwargs):
            actions_num = kwargs.pop('actions_num')
            input_shape = kwargs.pop('input_shape')
            input_shape = torch_ext.shape_whc_to_cwh(input_shape)
            self.num_seqs = num_seqs = kwargs.pop('num_seqs', 1)
            self.value_size = kwargs.pop('value_size', 1)

            NetworkBuilder.BaseNetwork.__init__(self, **kwargs)
            self.load(params)

            self.cnn = self._build_impala(input_shape, self.conv_depths)
            mlp_input_shape = self._calc_input_size(input_shape, self.cnn)

            in_mlp_shape = mlp_input_shape

            if len(self.units) == 0:
                out_size = mlp_input_shape
            else:
                out_size = self.units[-1]

            if self.has_rnn:
                if not self.is_rnn_before_mlp:
                    rnn_in_size = out_size
                    out_size = self.rnn_units
                else:
                    rnn_in_size = in_mlp_shape
                    in_mlp_shape = self.rnn_units
                self.rnn = self._build_rnn(self.rnn_name, rnn_in_size,
                                           self.rnn_units, self.rnn_layers)
                #self.layer_norm = torch.nn.LayerNorm(self.rnn_units)

            mlp_args = {
                'input_size': in_mlp_shape,
                'units': self.units,
                'activation': self.activation,
                'norm_func_name': self.normalization,
                'dense_func': torch.nn.Linear
            }

            self.mlp = self._build_mlp(**mlp_args)

            self.value = torch.nn.Linear(out_size, self.value_size)
            self.value_act = self.activations_factory.create(
                self.value_activation)
            self.flatten_act = self.activations_factory.create(self.activation)
            if self.is_discrete:
                self.logits = torch.nn.Linear(out_size, actions_num)
            if self.is_continuous:
                self.mu = torch.nn.Linear(out_size, actions_num)
                self.mu_act = self.activations_factory.create(
                    self.space_config['mu_activation'])
                mu_init = self.init_factory.create(
                    **self.space_config['mu_init'])
                self.sigma_act = self.activations_factory.create(
                    self.space_config['sigma_activation'])
                sigma_init = self.init_factory.create(
                    **self.space_config['sigma_init'])

                if self.space_config['fixed_sigma']:
                    self.sigma = nn.Parameter(torch.zeros(actions_num,
                                                          requires_grad=True,
                                                          dtype=torch.float32),
                                              requires_grad=True)
                else:
                    self.sigma = torch.nn.Linear(out_size, actions_num)

            mlp_init = self.init_factory.create(**self.initializer)

            for m in self.modules():
                if isinstance(m, nn.Conv2d):
                    nn.init.kaiming_normal_(m.weight, mode='fan_out')
                    #nn.init.xavier_uniform_(m.weight, gain=nn.init.calculate_gain('elu'))
            for m in self.mlp:
                if isinstance(m, nn.Linear):
                    mlp_init(m.weight)

            if self.is_discrete:
                mlp_init(self.logits.weight)
            if self.is_continuous:
                mu_init(self.mu.weight)
                if self.space_config['fixed_sigma']:
                    sigma_init(self.sigma)
                else:
                    sigma_init(self.sigma.weight)

            mlp_init(self.value.weight)
示例#6
0
        def __init__(self, params, **kwargs):
            actions_num = kwargs.pop('actions_num')
            input_shape = kwargs.pop('input_shape')
            self.value_size = kwargs.pop('value_size', 1)
            self.num_seqs = num_seqs = kwargs.pop('num_seqs', 1)
            NetworkBuilder.BaseNetwork.__init__(self)
            self.load(params)
            self.actor_cnn = nn.Sequential()
            self.critic_cnn = nn.Sequential()
            self.actor_mlp = nn.Sequential()
            self.critic_mlp = nn.Sequential()

            if self.has_cnn:
                input_shape = torch_ext.shape_whc_to_cwh(input_shape)
                cnn_args = {
                    'ctype': self.cnn['type'],
                    'input_shape': input_shape,
                    'convs': self.cnn['convs'],
                    'activation': self.cnn['activation'],
                    'norm_func_name': self.normalization,
                }
                self.actor_cnn = self._build_conv(**cnn_args)

                if self.separate:
                    self.critic_cnn = self._build_conv(**cnn_args)

            mlp_input_shape = self._calc_input_size(input_shape,
                                                    self.actor_cnn)

            if self.use_joint_obs_actions:
                use_embedding = self.joint_obs_actions_config['embedding']
                emb_size = self.joint_obs_actions_config['embedding_scale']
                num_agents = kwargs.pop('num_agents')
                mlp_out = mlp_input_shape // self.joint_obs_actions_config[
                    'mlp_scale']
                self.joint_actions = torch_ext.DiscreteActionsEncoder(
                    actions_num, mlp_out, emb_size, num_agents, use_embedding)
                mlp_input_shape = mlp_input_shape + mlp_out

            in_mlp_shape = mlp_input_shape
            if len(self.units) == 0:
                out_size = mlp_input_shape
            else:
                out_size = self.units[-1]

            if self.has_rnn:
                if not self.is_rnn_before_mlp:
                    rnn_in_size = out_size
                    out_size = self.rnn_units
                    if self.rnn_concat_input:
                        rnn_in_size += in_mlp_shape
                else:
                    rnn_in_size = in_mlp_shape
                    in_mlp_shape = self.rnn_units

                if self.separate:
                    self.a_rnn = self._build_rnn(self.rnn_name, rnn_in_size,
                                                 self.rnn_units,
                                                 self.rnn_layers)
                    self.c_rnn = self._build_rnn(self.rnn_name, rnn_in_size,
                                                 self.rnn_units,
                                                 self.rnn_layers)
                    if self.rnn_ln:
                        self.a_layer_norm = torch.nn.LayerNorm(self.rnn_units)
                        self.c_layer_norm = torch.nn.LayerNorm(self.rnn_units)
                else:
                    self.rnn = self._build_rnn(self.rnn_name, rnn_in_size,
                                               self.rnn_units, self.rnn_layers)
                    if self.rnn_ln:
                        self.layer_norm = torch.nn.LayerNorm(self.rnn_units)

            mlp_args = {
                'input_size': in_mlp_shape,
                'units': self.units,
                'activation': self.activation,
                'norm_func_name': self.normalization,
                'dense_func': torch.nn.Linear,
                'd2rl': self.is_d2rl,
                'norm_only_first_layer': self.norm_only_first_layer
            }
            self.actor_mlp = self._build_mlp(**mlp_args)
            if self.separate:
                self.critic_mlp = self._build_mlp(**mlp_args)

            self.value = torch.nn.Linear(out_size, self.value_size)
            self.value_act = self.activations_factory.create(
                self.value_activation)

            if self.is_discrete:
                self.logits = torch.nn.Linear(out_size, actions_num)
            '''
                for multidiscrete actions num is a tuple
            '''
            if self.is_multi_discrete:
                self.logits = torch.nn.ModuleList(
                    [torch.nn.Linear(out_size, num) for num in actions_num])
            if self.is_continuous:
                self.mu = torch.nn.Linear(out_size, actions_num)
                self.mu_act = self.activations_factory.create(
                    self.space_config['mu_activation'])
                mu_init = self.init_factory.create(
                    **self.space_config['mu_init'])
                self.sigma_act = self.activations_factory.create(
                    self.space_config['sigma_activation'])
                sigma_init = self.init_factory.create(
                    **self.space_config['sigma_init'])

                if self.space_config['fixed_sigma']:
                    self.sigma = nn.Parameter(torch.zeros(actions_num,
                                                          requires_grad=True,
                                                          dtype=torch.float32),
                                              requires_grad=True)
                else:
                    self.sigma = torch.nn.Linear(out_size, actions_num)

            mlp_init = self.init_factory.create(**self.initializer)
            if self.has_cnn:
                cnn_init = self.init_factory.create(**self.cnn['initializer'])

            for m in self.modules():
                if isinstance(m, nn.Conv2d) or isinstance(m, nn.Conv1d):
                    cnn_init(m.weight)
                    if getattr(m, "bias", None) is not None:
                        torch.nn.init.zeros_(m.bias)
                if isinstance(m, nn.Linear):
                    mlp_init(m.weight)
                    if getattr(m, "bias", None) is not None:
                        torch.nn.init.zeros_(m.bias)

            if self.is_continuous:
                mu_init(self.mu.weight)
                if self.space_config['fixed_sigma']:
                    sigma_init(self.sigma)
                else:
                    sigma_init(self.sigma.weight)
示例#7
0
    def __init__(self, base_name, params):
        self.config = config = params['config']
        print(config)
        # TODO: Get obs shape and self.network
        self.load_networks(params)
        self.base_init(base_name, config)
        self.num_seed_steps = config["num_seed_steps"]
        self.gamma = config["gamma"]
        self.critic_tau = config["critic_tau"]
        self.batch_size = config["batch_size"]
        self.init_alpha = config["init_alpha"]
        self.learnable_temperature = config["learnable_temperature"]
        self.replay_buffer_size = config["replay_buffer_size"]
        self.num_steps_per_episode = config.get("num_steps_per_episode", 1)
        self.normalize_input = config.get("normalize_input", False)

        self.max_env_steps = config.get(
            "max_env_steps",
            1000)  # temporary, in future we will use other approach

        print(self.batch_size, self.num_actors, self.num_agents)

        self.num_frames_per_epoch = self.num_actors * self.num_steps_per_episode

        self.log_alpha = torch.tensor(np.log(self.init_alpha)).float().to(
            self.sac_device)
        self.log_alpha.requires_grad = True
        action_space = self.env_info['action_space']
        self.actions_num = action_space.shape[0]

        self.action_range = [
            float(self.env_info['action_space'].low.min()),
            float(self.env_info['action_space'].high.max())
        ]

        obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
        net_config = {
            'obs_dim': self.env_info["observation_space"].shape[0],
            'action_dim': self.env_info["action_space"].shape[0],
            'actions_num': self.actions_num,
            'input_shape': obs_shape,
            'normalize_input': self.nnormalize_input
        }
        self.model = self.network.build(net_config)
        self.model.to(self.sac_device)

        print("Number of Agents", self.num_actors, "Batch Size",
              self.batch_size)

        self.actor_optimizer = torch.optim.Adam(
            self.model.sac_network.actor.parameters(),
            lr=self.config['actor_lr'],
            betas=self.config.get("actor_betas", [0.9, 0.999]))

        self.critic_optimizer = torch.optim.Adam(
            self.model.sac_network.critic.parameters(),
            lr=self.config["critic_lr"],
            betas=self.config.get("critic_betas", [0.9, 0.999]))

        self.log_alpha_optimizer = torch.optim.Adam([self.log_alpha],
                                                    lr=self.config["alpha_lr"],
                                                    betas=self.config.get(
                                                        "alphas_betas",
                                                        [0.9, 0.999]))

        self.replay_buffer = experience.VectorizedReplayBuffer(
            self.env_info['observation_space'].shape,
            self.env_info['action_space'].shape, self.replay_buffer_size,
            self.sac_device)
        self.target_entropy_coef = config.get("target_entropy_coef", 0.5)
        self.target_entropy = self.target_entropy_coef * -self.env_info[
            'action_space'].shape[0]
        print("Target entropy", self.target_entropy)
        self.step = 0
        self.algo_observer = config['features']['observer']

        # TODO: Is there a better way to get the maximum number of episodes?
        self.max_episodes = torch.ones(
            self.num_actors,
            device=self.sac_device) * self.num_steps_per_episode
示例#8
0
    def __init__(self, base_name, config):
        print(config)
        # TODO: Get obs shape and self.network
        self.base_init(base_name, config)
        self.num_seed_steps = config["num_seed_steps"]
        self.discount = config["discount"]
        self.critic_tau = config["critic_tau"]
        self.actor_update_frequency = config["actor_update_frequency"]
        self.critic_target_update_frequency = config[
            "critic_target_update_frequency"]
        self.batch_size = config["batch_size"]
        self.init_temperature = config["init_temperature"]
        self.learnable_temperature = config["learnable_temperature"]
        self.replay_buffer_size = config["replay_buffer_size"]
        self.num_steps_per_episode = config.get("num_steps_per_episode", 500)

        print(self.batch_size, self.num_actors, self.num_agents)

        self.num_frames_per_epoch = self.num_actors * self.num_steps_per_episode

        self.log_alpha = torch.tensor(np.log(self.init_temperature)).to(
            self.sac_device)
        self.log_alpha.requires_grad = True
        action_space = self.env_info['action_space']
        self.actions_num = action_space.shape[0]

        self.action_range = [
            float(self.env_info['action_space'].low.min()),
            float(self.env_info['action_space'].high.max())
        ]

        obs_shape = torch_ext.shape_whc_to_cwh(self.obs_shape)
        config = {
            'obs_dim': self.env_info["observation_space"].shape[0],
            'action_dim': self.env_info["action_space"].shape[0],
            'actions_num': self.actions_num,
            'input_shape': obs_shape
        }
        self.model = self.network.build(config)
        self.model.to(self.sac_device)
        print("Number of Agents", self.num_actors, "Batch Size",
              self.batch_size)

        self.actor_optimizer = torch.optim.Adam(
            self.model.sac_network.actor.parameters(),
            lr=self.config['actor_lr'],
            betas=self.config["actor_betas"])

        self.critic_optimizer = torch.optim.Adam(
            self.model.sac_network.critic.parameters(),
            lr=self.config["critic_lr"],
            betas=self.config["critic_betas"])

        self.log_alpha_optimizer = torch.optim.Adam(
            [self.log_alpha],
            lr=self.config["alpha_lr"],
            betas=self.config["alpha_betas"])

        self.replay_buffer = experience.VectorizedReplayBuffer(
            self.env_info['observation_space'].shape,
            self.env_info['action_space'].shape, self.replay_buffer_size,
            self.sac_device)
        self.target_entropy = -self.env_info['action_space'].shape[0]
        # self.algo_observer.after_init(self)
        # TODO: Algo_Observer?
        self.step = 0

        # TODO: Is there a better way to get the maximum number of episodes?
        self.max_episodes = np.ones(self.num_actors) * 1000
        self.episode_lengths = np.zeros(self.num_actors, dtype=int)