def __init__(self, num_inputs, dist_mem, args): super().__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=0.01) hidden_dim = args.dist_hidden_dim self.args = args action_dim = args.o_dim if args.use_option_embs else args.z_dim self.dist_mem = dist_mem if args.dist_linear_action: self.action_linear = init_(nn.Linear(action_dim, hidden_dim)) else: self.action_linear = nn.Sequential( init_(nn.Linear(action_dim, hidden_dim)), nn.ReLU(), init_(nn.Linear(hidden_dim, hidden_dim))) if args.dist_non_linear_final: self.linear = nn.Sequential( init_(nn.Linear(hidden_dim + num_inputs, hidden_dim)), nn.ReLU(), init_(nn.Linear(hidden_dim, 1))) else: self.linear = init_(nn.Linear(hidden_dim + num_inputs, 1))
def __init__(self, num_inputs, num_outputs, args): super(Categorical, self).__init__() self.args = args init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=0.01) self.linear = init_(nn.Linear(num_inputs, num_outputs))
def __init__(self, num_inputs, num_outputs, softplus=False, use_double=False, use_mean_entropy=False): super(DiagGaussian, self).__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.use_double = use_double self.use_mean_entropy = use_mean_entropy self.fc_mean = init_(nn.Linear(num_inputs, num_outputs)) self.logstd = AddBias(torch.zeros(num_outputs))
def __init__(self, num_inputs, num_outputs, softplus=False, use_double=False, use_mean_entropy=False): super().__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.softplus = softplus self.use_double = use_double self.fc_mean = init_(nn.Linear(num_inputs, num_outputs)) self.use_mean_entropy = use_mean_entropy if not softplus: self.fc_logstd = init_(nn.Linear(num_inputs, num_outputs)) else: self.softplus_fn = nn.Softplus(threshold=10) self.fc_var = init_(nn.Linear(num_inputs, num_outputs))
def __init__(self, num_inputs, num_outputs, softplus=False, scale=2.0, mid=0.0, use_double=False): super().__init__() init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) self.scale = scale self.mid = mid self.softplus = softplus self.use_double = use_double if softplus: self.softplus_fn = nn.Softplus(threshold=10) # Alpha and beta should always be positive self.fc_alpha = init_(nn.Linear(num_inputs, num_outputs)) self.fc_beta = init_(nn.Linear(num_inputs, num_outputs))
def init_(m): return init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), np.sqrt(2))
def init_(m): return init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), nn.init.calculate_gain('relu'))
def __init__(self, state_size, cont_output_size, dist_mem, args, use_double=False): super().__init__() self.args = args self.use_double = use_double self.cont_entropy_coef = args.cont_entropy_coef # Discrete init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init.constant_(x, 0), gain=0.01) hidden_dim = args.dist_hidden_dim action_dim = args.o_dim if args.use_option_embs else args.z_dim self.dist_mem = dist_mem if args.dist_linear_action: self.action_linear = init_(nn.Linear(action_dim, hidden_dim)) else: self.action_linear = nn.Sequential( init_(nn.Linear(action_dim, hidden_dim)), nn.ReLU(), init_(nn.Linear(hidden_dim, hidden_dim))) if args.dist_non_linear_final: self.linear = nn.Sequential( init_(nn.Linear(hidden_dim + state_size, hidden_dim)), nn.ReLU(), init_(nn.Linear(hidden_dim, 1))) else: self.linear = init_(nn.Linear(hidden_dim + state_size, 1)) # Continuous init_ = lambda m: init(m, nn.init.orthogonal_, lambda x: nn.init. constant_(x, 0)) if args.use_beta: if args.softplus: self.softplus_fn = nn.Softplus(threshold=10) if args.conditioned_non_linear: self.fc_alpha = nn.Sequential( init_(nn.Linear(hidden_dim + state_size, hidden_dim)), nn.ReLU(), init_(nn.Linear(hidden_dim, cont_output_size))) self.fc_beta = nn.Sequential( init_(nn.Linear(hidden_dim + state_size, hidden_dim)), nn.ReLU(), init_(nn.Linear(hidden_dim, cont_output_size))) else: self.fc_alpha = init_( nn.Linear(hidden_dim + state_size, cont_output_size)) self.fc_beta = init_( nn.Linear(hidden_dim + state_size, cont_output_size)) else: if args.conditioned_non_linear: self.fc_mean = nn.Sequential( init_(nn.Linear(hidden_dim + state_size, hidden_dim)), nn.ReLU(), init_(nn.Linear(hidden_dim, cont_output_size))) else: self.fc_mean = init_( nn.Linear(hidden_dim + state_size, cont_output_size)) # self.logstd = nn.Parameter(torch.randn(n_cont)) if not args.softplus: if args.conditioned_non_linear: self.fc_logstd = nn.Sequential( init_(nn.Linear(hidden_dim + state_size, hidden_dim)), nn.ReLU(), init_(nn.Linear(hidden_dim, cont_output_size))) else: self.fc_logstd = init_( nn.Linear(hidden_dim + state_size, cont_output_size)) else: self.softplus_fn = nn.Softplus(threshold=10) if args.conditioned_non_linear: self.fc_var = nn.Sequential( init_(nn.Linear(hidden_dim + state_size, hidden_dim)), nn.ReLU(), init_(nn.Linear(hidden_dim, cont_output_size))) else: self.fc_var = init_( nn.Linear(hidden_dim + state_size, cont_output_size))