def __init__(self, num_channels, output_dim, initializer='xavier'): super(DiscreteConvQNetwork, self).__init__() self.base = create_dqn_base(num_channels, initializer=initializer) self.V_stream = create_linear_network(7 * 7 * 64, 1, hidden_units=[512], initializer=initializer) self.A_stream = create_linear_network(7 * 7 * 64, output_dim, hidden_units=[512], initializer=initializer)
def __init__(self, num_inputs, num_actions, hidden_units=[256, 256], initializer='xavier'): super(GaussianPolicy, self).__init__() # https://github.com/ku2482/rltorch/blob/master/rltorch/network/builder.py self.policy = create_linear_network( num_inputs, num_actions*2, hidden_units=hidden_units, initializer=initializer)
def __init__(self, num_channels, output_dim, initializer='kaiming'): super(ConvCategoricalPolicy, self).__init__() self.policy = nn.Sequential( *create_dqn_base(num_channels), *create_linear_network(7 * 7 * 64, output_dim, hidden_units=[512], output_activation='softmax', initializer=initializer))
def __init__(self, input_dim, output_dim, hidden_units=[], initializer='xavier'): super(LinearGaussianPolicy, self).__init__() self.policy = create_linear_network(input_dim, output_dim * 2, hidden_units=hidden_units, initializer=initializer)
def __init__(self, input_dim, output_dim, hidden_units=[], initializer='xavier'): super(ContinuousLinearQNetwork, self).__init__() self.Q = create_linear_network(input_dim + output_dim, 1, hidden_units=hidden_units, initializer=initializer)