示例#1
0
    def __init__(self, num_inputs, dist_mem, args):
        super().__init__()

        init_ = lambda m: init(m,
                               nn.init.orthogonal_,
                               lambda x: nn.init.constant_(x, 0),
                               gain=0.01)

        hidden_dim = args.dist_hidden_dim
        self.args = args
        action_dim = args.o_dim if args.use_option_embs else args.z_dim
        self.dist_mem = dist_mem

        if args.dist_linear_action:
            self.action_linear = init_(nn.Linear(action_dim, hidden_dim))
        else:
            self.action_linear = nn.Sequential(
                init_(nn.Linear(action_dim, hidden_dim)), nn.ReLU(),
                init_(nn.Linear(hidden_dim, hidden_dim)))

        if args.dist_non_linear_final:
            self.linear = nn.Sequential(
                init_(nn.Linear(hidden_dim + num_inputs, hidden_dim)),
                nn.ReLU(), init_(nn.Linear(hidden_dim, 1)))
        else:
            self.linear = init_(nn.Linear(hidden_dim + num_inputs, 1))
示例#2
0
    def __init__(self, num_inputs, num_outputs, args):
        super(Categorical, self).__init__()

        self.args = args

        init_ = lambda m: init(m,
                               nn.init.orthogonal_,
                               lambda x: nn.init.constant_(x, 0),
                               gain=0.01)

        self.linear = init_(nn.Linear(num_inputs, num_outputs))
示例#3
0
    def __init__(self,
                 num_inputs,
                 num_outputs,
                 softplus=False,
                 use_double=False,
                 use_mean_entropy=False):
        super(DiagGaussian, self).__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))
        self.use_double = use_double
        self.use_mean_entropy = use_mean_entropy
        self.fc_mean = init_(nn.Linear(num_inputs, num_outputs))

        self.logstd = AddBias(torch.zeros(num_outputs))
示例#4
0
    def __init__(self,
                 num_inputs,
                 num_outputs,
                 softplus=False,
                 use_double=False,
                 use_mean_entropy=False):
        super().__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.softplus = softplus
        self.use_double = use_double
        self.fc_mean = init_(nn.Linear(num_inputs, num_outputs))
        self.use_mean_entropy = use_mean_entropy

        if not softplus:
            self.fc_logstd = init_(nn.Linear(num_inputs, num_outputs))
        else:
            self.softplus_fn = nn.Softplus(threshold=10)
            self.fc_var = init_(nn.Linear(num_inputs, num_outputs))
示例#5
0
    def __init__(self,
                 num_inputs,
                 num_outputs,
                 softplus=False,
                 scale=2.0,
                 mid=0.0,
                 use_double=False):
        super().__init__()

        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))

        self.scale = scale
        self.mid = mid
        self.softplus = softplus
        self.use_double = use_double

        if softplus:
            self.softplus_fn = nn.Softplus(threshold=10)

        # Alpha and beta should always be positive
        self.fc_alpha = init_(nn.Linear(num_inputs, num_outputs))
        self.fc_beta = init_(nn.Linear(num_inputs, num_outputs))
示例#6
0
 def init_(m):
     return init(m, nn.init.orthogonal_,
                 lambda x: nn.init.constant_(x, 0), np.sqrt(2))
示例#7
0
 def init_(m):
     return init(m, nn.init.orthogonal_,
                 lambda x: nn.init.constant_(x, 0),
                 nn.init.calculate_gain('relu'))
示例#8
0
    def __init__(self,
                 state_size,
                 cont_output_size,
                 dist_mem,
                 args,
                 use_double=False):
        super().__init__()
        self.args = args
        self.use_double = use_double
        self.cont_entropy_coef = args.cont_entropy_coef

        # Discrete
        init_ = lambda m: init(m,
                               nn.init.orthogonal_,
                               lambda x: nn.init.constant_(x, 0),
                               gain=0.01)

        hidden_dim = args.dist_hidden_dim
        action_dim = args.o_dim if args.use_option_embs else args.z_dim
        self.dist_mem = dist_mem

        if args.dist_linear_action:
            self.action_linear = init_(nn.Linear(action_dim, hidden_dim))
        else:
            self.action_linear = nn.Sequential(
                init_(nn.Linear(action_dim, hidden_dim)), nn.ReLU(),
                init_(nn.Linear(hidden_dim, hidden_dim)))

        if args.dist_non_linear_final:
            self.linear = nn.Sequential(
                init_(nn.Linear(hidden_dim + state_size, hidden_dim)),
                nn.ReLU(), init_(nn.Linear(hidden_dim, 1)))
        else:
            self.linear = init_(nn.Linear(hidden_dim + state_size, 1))

        # Continuous
        init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.
                               constant_(x, 0))
        if args.use_beta:
            if args.softplus:
                self.softplus_fn = nn.Softplus(threshold=10)
            if args.conditioned_non_linear:
                self.fc_alpha = nn.Sequential(
                    init_(nn.Linear(hidden_dim + state_size, hidden_dim)),
                    nn.ReLU(), init_(nn.Linear(hidden_dim, cont_output_size)))
                self.fc_beta = nn.Sequential(
                    init_(nn.Linear(hidden_dim + state_size, hidden_dim)),
                    nn.ReLU(), init_(nn.Linear(hidden_dim, cont_output_size)))
            else:
                self.fc_alpha = init_(
                    nn.Linear(hidden_dim + state_size, cont_output_size))
                self.fc_beta = init_(
                    nn.Linear(hidden_dim + state_size, cont_output_size))
        else:
            if args.conditioned_non_linear:
                self.fc_mean = nn.Sequential(
                    init_(nn.Linear(hidden_dim + state_size, hidden_dim)),
                    nn.ReLU(), init_(nn.Linear(hidden_dim, cont_output_size)))
            else:
                self.fc_mean = init_(
                    nn.Linear(hidden_dim + state_size, cont_output_size))
            # self.logstd = nn.Parameter(torch.randn(n_cont))
            if not args.softplus:
                if args.conditioned_non_linear:
                    self.fc_logstd = nn.Sequential(
                        init_(nn.Linear(hidden_dim + state_size, hidden_dim)),
                        nn.ReLU(),
                        init_(nn.Linear(hidden_dim, cont_output_size)))
                else:
                    self.fc_logstd = init_(
                        nn.Linear(hidden_dim + state_size, cont_output_size))
            else:
                self.softplus_fn = nn.Softplus(threshold=10)
                if args.conditioned_non_linear:
                    self.fc_var = nn.Sequential(
                        init_(nn.Linear(hidden_dim + state_size, hidden_dim)),
                        nn.ReLU(),
                        init_(nn.Linear(hidden_dim, cont_output_size)))
                else:
                    self.fc_var = init_(
                        nn.Linear(hidden_dim + state_size, cont_output_size))