Exemplo n.º 1
0
    def __init__(self,
                 architecture,
                 weight_init=gauss_weights_init(0, 0.02),
                 activation_functions=None):
        super(ActorCriticPPO, self).__init__()
        if len(architecture) < 2:
            raise Exception(
                "Architecture needs at least two numbers to create network")
        #assert architecture[-1]%2 == 1, "Last layer has to represent 2*actions_space for the Gaussian + 1 for value"
        self.activation_functions = activation_functions
        self.layer_list = []
        self.layer_list_val = []
        self.siglog = tor.zeros(1, requires_grad=True)

        self.siglog = nn.Parameter(self.siglog)

        for i in range(len(architecture) - 1):
            self.layer_list.append(
                nn.Linear(architecture[i], architecture[i + 1]))
            setattr(self, "fc" + str(i), self.layer_list[-1])

        for i in range(len(architecture) - 2):
            self.layer_list_val.append(
                nn.Linear(architecture[i], architecture[i + 1]))
            setattr(self, "fc_val" + str(i), self.layer_list_val[-1])

        self.layer_list_val.append(nn.Linear(architecture[-2], 1))
        setattr(self, "fc_val" + str(len(architecture) - 1),
                self.layer_list_val[-1])

        self.apply(weight_init)
Exemplo n.º 2
0
    def __init__(self, architecture, weight_init=gauss_weights_init(0,0.02),
            activation_functions=None):
        super(NeuralNet, self).__init__()
        self.activation_functions = activation_functions
        self.layer_list = []
        for i in range(len(architecture)-1):
            self.layer_list.append(nn.Linear(architecture[i], architecture[i+1]))
            setattr(self, "fc" + str(i), self.layer_list[-1])

        #self.last_linear = nn.Linear(architecture[-1], 1)
        self.apply(weight_init)
Exemplo n.º 3
0
    def __init__(self, architecture, weight_init=gauss_weights_init(0,0.02),activation_functions=None):
        super(SimpleNetwork, self).__init__()
        if len(architecture) < 2:
            raise Exception("Architecture needs at least two numbers to create network")

        self.activation_functions = activation_functions
        self.layer_list = []
        for i in range(len(architecture)-1):
            self.layer_list.append(nn.Linear(architecture[i], architecture[i+1]))
            setattr(self, "fc" + str(i), self.layer_list[-1])

        self.apply(weight_init)
Exemplo n.º 4
0
    def __init__(self,
                 architecture,
                 num_grus=2,
                 weight_init=gauss_weights_init(0, 0.02),
                 activation_functions=None):
        """ 
            First number of architecture indicates number of GRU units
        """
        super(RecurrentActorCriticPPO, self).__init__()
        if len(architecture) < 2:
            raise Exception(
                "Architecture needs at least two numbers to create network")
        #assert architecture[-1]%2 == 1, "Last layer has to represent 2*actions_space for the Gaussian + 1 for value"
        self.activation_functions = activation_functions
        self.layer_list = []
        self.layer_list_val = []

        gru_size = architecture[0]
        #architecture = architecture[1:]

        self.value_gru = nn.GRU(input_size=gru_size,
                                hidden_size=gru_size,
                                num_layers=num_grus,
                                batch_first=True)
        self.policy_gru = nn.GRU(input_size=gru_size,
                                 hidden_size=gru_size,
                                 num_layers=num_grus,
                                 batch_first=True)

        self.siglog = tor.zeros(1, requires_grad=True)

        self.siglog = nn.Parameter(self.siglog)

        for i in range(len(architecture) - 1):
            self.layer_list.append(
                nn.Linear(architecture[i], architecture[i + 1]))
            setattr(self, "fc" + str(i), self.layer_list[-1])

        for i in range(len(architecture) - 2):
            self.layer_list_val.append(
                nn.Linear(architecture[i], architecture[i + 1]))
            setattr(self, "fc_val" + str(i), self.layer_list_val[-1])

        self.layer_list_val.append(nn.Linear(architecture[-2], 1))
        setattr(self, "fc_val" + str(len(architecture) - 1),
                self.layer_list_val[-1])

        self.apply(weight_init)
Exemplo n.º 5
0
    def __init__(self,
                 architecture,
                 weight_init=gauss_weights_init(0, 0.02),
                 activation_functions=None):
        super(RandSigmaPPONetwork, self).__init__()
        if len(architecture) < 2:
            raise Exception(
                "Architecture needs at least two numbers to create network")
        #assert architecture[-1]%2 == 1, "Last layer has to represent 2*actions_space for the Gaussian + 1 for value"
        self.activation_functions = activation_functions
        self.layer_list = []
        self.sigma_log = self.sigma_log_bounds[0]
        for i in range(len(architecture) - 1):
            self.layer_list.append(
                nn.Linear(architecture[i], architecture[i + 1]))
            setattr(self, "fc" + str(i), self.layer_list[-1])

        self.apply(weight_init)
Exemplo n.º 6
0
    def __init__(self,
                 architecture,
                 recurrent_layers=2,
                 weight_init=gauss_weights_init(0, 0.02),
                 activation_functions=None,
                 bidirectional=True,
                 recurr_type=nn.GRU):
        """ 
            First number of architecture indicates number of GRU units
        """
        super(RecurrentNet, self).__init__()
        if len(architecture) < 2:
            raise Exception(
                "Architecture needs at least two numbers to create network")
        #assert architecture[-1]%2 == 1, "Last layer has to represent 2*actions_space for the Gaussian + 1 for value"
        self.activation_functions = activation_functions
        self.layer_list = []
        self.layer_list_val = []
        self.recurrent = True

        recurr_size = architecture[0]
        #architecture = architecture[1:]

        self.recurrent_unit = recurr_type(input_size=recurr_size,
                                          hidden_size=recurr_size,
                                          num_layers=recurrent_layers,
                                          batch_first=True,
                                          bidirectional=bidirectional)

        for i in range(len(architecture) - 1):
            self.layer_list.append(
                nn.Linear(architecture[i], architecture[i + 1]))
            setattr(self, "fc" + str(i), self.layer_list[-1])

        self.h_n = None

        self.apply(weight_init)
    Implementation of the hindsight policy gradient paper: https://arxiv.org/pdf/1711.06006.pdf.

"""

# Training parameters
num_bits = 8
num_episodes = 10000
episode_length = 16
mvr_tracker = deque(maxlen=400)

env = BitFlippingEnv(num_bits)

policy = PolicyAHG(num_bits * 2, num_bits + 1)

# Initialization of weights
policy.apply(gauss_weights_init(0, 0.02))
policy.zero_grad()
optimizer = Adam(policy.parameters(), lr=0.001)

# Keeps track of the current episode
episode_steps = [0] * episode_length
for i in range(num_episodes):

    # The reward accumulated in the episode
    acc_reward = 0
    acc_distance = 0
    actions = []
    goal_occurances = {}
    goal_occurances[tuple(env.goal)] = 1
    state_and_goal = np.zeros((1, num_bits * 2))