예제 #1
0
    def __init__(self, num_inputs=1, num_outputs=6, S_LEN=8):
        super(agentNET, self).__init__()

        self.conv1 = nn.Conv2d(num_inputs, 32, (6, 3), stride=1)
        self.conv2 = nn.Conv2d(32, 64, (1, 3), stride=1)
        self.conv3 = nn.Conv2d(64, 128, (1, 2), stride=1)

        self.lstm = nn.LSTMCell(128 * (S_LEN - 2 - 2 - 1), 96)
        self.fc1 = nn.Linear(96, 48)
        self.fc2 = nn.Linear(48, 24)

        self.critic_linear = nn.Linear(24, 1)
        self.actor_linear = nn.Linear(24, num_outputs)

        self.apply(weights_init)
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)

        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.fc1.weight.data = norm_col_init(self.fc1.weight.data, 1.0)
        self.fc1.bias.data.fill_(0)

        self.fc2.weight.data = norm_col_init(self.fc2.weight.data, 1.0)
        self.fc2.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.train()
예제 #2
0
    def __init__(self, num_inputs, action_space):
        super(A3Clstm, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 5, stride=1, padding=2)
        self.maxp1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 32, 5, stride=1, padding=1)
        self.maxp2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(32, 64, 4, stride=1, padding=1)
        self.maxp3 = nn.MaxPool2d(2, 2)
        self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=1)
        self.maxp4 = nn.MaxPool2d(2, 2)

        self.lstm = nn.LSTMCell(1024, 512)
        num_outputs = action_space.n
        self.critic_linear = nn.Linear(512, 1)
        self.actor_linear = nn.Linear(512, num_outputs)

        self.apply(weights_init)
        relu_gain = nn.init.calculate_gain('relu')
        self.conv1.weight.data.mul_(relu_gain)
        self.conv2.weight.data.mul_(relu_gain)
        self.conv3.weight.data.mul_(relu_gain)
        self.conv4.weight.data.mul_(relu_gain)
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.train()
예제 #3
0
    def __init__(self, num_inputs, action_space):
        super(A3Clstm, self).__init__()

        self.basic = nn.Sequential(
            nn.Conv2d(num_inputs, 32, 5, stride=1, padding=2),
            nn.MaxPool2d(2, 2), nn.ReLU(),
            nn.Conv2d(32, 32, 5, stride=1, padding=1), nn.MaxPool2d(2, 2),
            nn.ReLU(), nn.Conv2d(32, 64, 4, stride=1, padding=1),
            nn.MaxPool2d(2, 2), nn.ReLU(),
            nn.Conv2d(64, 64, 3, stride=1, padding=1), nn.MaxPool2d(2, 2),
            nn.ReLU())

        self.lstm = nn.LSTMCell(1024, 512)
        num_outputs = action_space.n
        self.critic_linear = nn.Linear(512, 1)
        self.actor_linear = nn.Linear(512, num_outputs)

        self.apply(weights_init)
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.train()
예제 #4
0
    def __init__(self, num_inputs=1, num_outputs=5):
        super(agentNET, self).__init__()

        self.conv1 = nn.Conv2d(num_inputs, 16, 4, stride=1, padding=0)
        self.conv2 = nn.Conv2d(16, 16, 3, stride=1, padding=0)
        self.conv3 = nn.Conv2d(16, 8, 3, stride=1, padding=0)

        self.lstm = nn.LSTMCell(312, 78)

        self.fc1 = nn.Linear(78, 20)

        self.critic_linear = nn.Linear(20, 1)
        self.actor_linear = nn.Linear(20, num_outputs)

        self.apply(weights_init)
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)

        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.fc1.weight.data = norm_col_init(self.fc1.weight.data, 1.0)
        self.fc1.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.train()
예제 #5
0
    def __init__(self, embedding_length, classes=2):
        super(SA_NET, self).__init__()

        self.conv1 = nn.Conv2d(1,
                               256, (7, embedding_length),
                               stride=1,
                               padding=(3, 0))
        self.conv2 = nn.Conv1d(256, 64, 5, stride=1, padding=2)
        self.conv3 = nn.Conv1d(64, 256, 3, stride=1, padding=1)
        self.conv4 = nn.Conv1d(256, 16, 1, stride=1, padding=0)

        self.lstm = nn.LSTMCell(embedding_length, LSTM_Hidden_Size)

        self.fc1 = nn.Linear(LSTM_Hidden_Size + CNN_Feature_Size, 128)
        self.fc2 = nn.Linear(128, 32)
        self.fc3 = nn.Linear(32, 1)

        self.apply(weights_init)

        self.fc1.weight.data = norm_col_init(self.fc1.weight.data, 1.0)
        self.fc1.bias.data.fill_(0)

        self.fc2.weight.data = norm_col_init(self.fc2.weight.data, 1.0)
        self.fc2.bias.data.fill_(0)

        self.fc3.weight.data = norm_col_init(self.fc3.weight.data, 1.0)
        self.fc3.bias.data.fill_(0)

        self.train()
예제 #6
0
    def __init__(self, num_inputs, action_space, terminal_prediction,
                 reward_prediction):
        super(A3C_CONV, self).__init__()
        self.conv1 = nn.Conv1d(num_inputs, 32, 3, stride=1, padding=1)
        self.lrelu1 = nn.LeakyReLU(0.1)
        self.conv2 = nn.Conv1d(32, 32, 3, stride=1, padding=1)
        self.lrelu2 = nn.LeakyReLU(0.1)
        self.conv3 = nn.Conv1d(32, 64, 2, stride=1, padding=1)
        self.lrelu3 = nn.LeakyReLU(0.1)
        self.conv4 = nn.Conv1d(64, 64, 1, stride=1)
        self.lrelu4 = nn.LeakyReLU(0.1)

        self.lstm = nn.LSTMCell(1600, 128)
        num_outputs = action_space.shape[0]
        self.critic_linear = nn.Linear(128, 1)
        self.actor_linear = nn.Linear(128, num_outputs)
        self.actor_linear2 = nn.Linear(128, num_outputs)

        self.terminal_aux_head = None
        if terminal_prediction:  # this comes with the arg parser
            self.terminal_aux_head = nn.Linear(128,
                                               1)  # output a single prediction

        self.reward_aux_head = None
        if reward_prediction:
            self.reward_aux_head = nn.Linear(
                128, 1)  # output a single estimate of reward prediction

        self.apply(weights_init)
        lrelu_gain = nn.init.calculate_gain('leaky_relu')
        self.conv1.weight.data.mul_(lrelu_gain)
        self.conv2.weight.data.mul_(lrelu_gain)
        self.conv3.weight.data.mul_(lrelu_gain)
        self.conv4.weight.data.mul_(lrelu_gain)

        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.actor_linear2.weight.data = norm_col_init(
            self.actor_linear2.weight.data, 0.01)
        self.actor_linear2.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        # new added parts for auxiliary tasks within the network
        if terminal_prediction:
            self.terminal_aux_head.weight.data = norm_col_init(
                self.terminal_aux_head.weight.data, 1.0)
            self.terminal_aux_head.bias.data.fill_(0)

        if reward_prediction:
            self.reward_aux_head.weight.data = norm_col_init(
                self.reward_aux_head.weight.data, 1.0)
            self.reward_aux_head.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.train()
예제 #7
0
    def __init__(self, num_inputs, action_space, pre_rnet='None'):
        super(A3Clstm, self).__init__()

        self.lstm_1 = nn.LSTMCell(1024, 512)
        self.lstm_2 = nn.LSTMCell(1024, 512)
        num_outputs = action_space.n
        self.critic_linear = nn.Linear(512, 1)
        self.actor_linear = nn.Linear(512, num_outputs)

        self.apply(weights_init)
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm_1.bias_ih.data.fill_(0)
        self.lstm_2.bias_ih.data.fill_(0)
        self.lstm_1.bias_hh.data.fill_(0)
        self.lstm_2.bias_hh.data.fill_(0)
        if pre_rnet == 'None':
            self.r_net = RepresentNet()
            # self.c_net = TDClass()
        else:
            self.r_net = torch.load("pre_model/r_net_{}.pkl".format(pre_rnet))
            # self.c_net = torch.load("pre_model/c_net_{}.pkl".format(pre_rnet))

        self.train()
예제 #8
0
    def __init__(self, num_inputs, action_space, terminal_prediction,
                 reward_prediction):
        super(A3Clstm, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 5, stride=1, padding=2)
        self.maxp1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 32, 5, stride=1, padding=1)
        self.maxp2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(32, 64, 4, stride=1, padding=1)
        self.maxp3 = nn.MaxPool2d(2, 2)
        self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=1)
        self.maxp4 = nn.MaxPool2d(2, 2)

        self.lstm = nn.LSTMCell(1024, 128)  # it was 1024 x 512

        num_outputs = action_space.n

        self.critic_linear = nn.Linear(128, 1)  # it was 512 x 1
        self.actor_linear = nn.Linear(128, num_outputs)

        self.terminal_aux_head = None
        if terminal_prediction:  # this comes with the arg parser
            self.terminal_aux_head = nn.Linear(128,
                                               1)  # output a single prediction
        # TODO later reward prediction will be added here as well ...

        self.reward_aux_head = None
        if reward_prediction:
            self.reward_aux_head = nn.Linear(
                128, 1)  # output a single estimate of reward prediction

        self.apply(weights_init)
        relu_gain = nn.init.calculate_gain('relu')
        self.conv1.weight.data.mul_(relu_gain)
        self.conv2.weight.data.mul_(relu_gain)
        self.conv3.weight.data.mul_(relu_gain)
        self.conv4.weight.data.mul_(relu_gain)
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        # new added parts for auxiliary tasks within the network
        if terminal_prediction:
            self.terminal_aux_head.weight.data = norm_col_init(
                self.terminal_aux_head.weight.data, 1.0)
            self.terminal_aux_head.bias.data.fill_(0)

        if reward_prediction:
            self.reward_aux_head.weight.data = norm_col_init(
                self.reward_aux_head.weight.data, 1.0)
            self.reward_aux_head.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.train()
예제 #9
0
파일: OCPGLinear.py 프로젝트: Aks-Dmv/AROC
    def __init__(self, num_inputs, action_space, num_options, nnWidth):
        super(OCPGModel, self).__init__()
        self.numbInputs = num_inputs
        self.module_list = nn.ModuleList()
        # self.lin1 = nn.Linear(num_inputs, nnWidth)
        # self.module_list += [self.lin1]
        # self.lin2 = nn.Linear(nnWidth, nnWidth)
        # self.module_list += [self.lin2]
        # self.lin3 = nn.Linear(nnWidth, nnWidth)
        # self.module_list += [self.lin3]
        try:
            num_outputs = action_space.n
        except AttributeError:
            num_outputs = len(action_space.sample())
        self.critic_linear = nn.Linear(num_inputs, num_options)
        self.module_list += [self.critic_linear]
        self.optionpolicy = nn.Linear(num_inputs, num_options)
        self.module_list += [self.optionpolicy]
        self.optionpolicy.weight.data = norm_col_init(
            self.optionpolicy.weight.data, 0.01)
        self.optionpolicy.bias.data.fill_(0)

        self.apply(weights_init)
        relu_gain = nn.init.calculate_gain('relu')
        # self.lin1.weight.data = norm_col_init(
        #     self.lin1.weight.data, 1.0)
        # self.lin1.bias.data.fill_(0)

        # self.lin2.weight.data = norm_col_init(
        #     self.lin2.weight.data, 1.0)
        # self.lin2.bias.data.fill_(0)
        #
        # self.lin3.weight.data = norm_col_init(
        #     self.lin3.weight.data, 1.0)
        # self.lin3.bias.data.fill_(0)

        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.policylayer = {}
        self.termlayer = {}
        for i in range(0, num_options):
            self.policylayer[i] = nn.Linear(num_inputs, num_outputs)
            self.module_list += [self.policylayer[i]]
            self.termlayer[i] = nn.Linear(num_inputs, 1)
            self.module_list += [self.termlayer[i]]
            self.policylayer[i].weight.data = norm_col_init(
                self.policylayer[i].weight.data, 0.01)
            self.policylayer[i].bias.data.fill_(0)
            self.termlayer[i].weight.data = norm_col_init(
                self.termlayer[i].weight.data, 0.01)
            self.termlayer[i].bias.data.fill_(0)

        self.train()
예제 #10
0
    def __init__(self, input_dim, action_dim):
        super(PolicyNet, self).__init__()

        self.fc1 = nn.Linear(input_dim, 512)
        self.fc1.weight.data = norm_col_init(self.fc1.weight.data, 1)

        self.fc2 = nn.Linear(512, 256)
        self.fc2.weight.data = norm_col_init(self.fc2.weight.data, 1)

        self.fc3 = nn.Linear(256, action_dim)
        self.fc3.weight.data = norm_col_init(self.fc3.weight.data, 1)
예제 #11
0
    def __init__(self, num_inputs, action_space):
        super(A3Clstm, self).__init__()
        self.conv1 = nn.Conv2d(num_inputs, 32, 5, stride=1, padding=2)
        self.maxp1 = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(32, 32, 5, stride=1, padding=1)
        self.maxp2 = nn.MaxPool2d(2, 2)
        self.conv3 = nn.Conv2d(32, 64, 4, stride=1, padding=1)
        self.maxp3 = nn.MaxPool2d(2, 2)
        self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=1)
        self.maxp4 = nn.MaxPool2d(2, 2)

        # 1024 = 64 * 64 / 4

        self.flatten = nn.Linear(1024, 100)

        self.lstm = nn.LSTMCell(100, 100)
        num_outputs = action_space.n

        # Critic (State -> Value)
        self.critic_linear = nn.Linear(100, 1)

        # Actor (State -> Action Probabilities)
        self.actor_linear = nn.Linear(100, num_outputs)

        # LSTM for encoding state into language for actor
        self.lstm_enc = nn.LSTMCell(100, 100)

        # LSTM for decoding state
        self.lstm_dec = nn.LSTMCell(100, 100)

        self.apply(weights_init)
        relu_gain = nn.init.calculate_gain('relu')
        self.conv1.weight.data.mul_(relu_gain)
        self.conv2.weight.data.mul_(relu_gain)
        self.conv3.weight.data.mul_(relu_gain)
        self.conv4.weight.data.mul_(relu_gain)
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.lstm_enc.bias_ih.data.fill_(0)
        self.lstm_enc.bias_hh.data.fill_(0)

        self.lstm_dec.bias_ih.data.fill_(0)
        self.lstm_dec.bias_hh.data.fill_(0)

        self.train()
예제 #12
0
파일: ACLSTM.py 프로젝트: Aks-Dmv/AROC
    def __init__(self, num_inputs, action_space, num_options, nnWidth):
        super(AClstm, self).__init__()
        self.module_list = nn.ModuleList()
        # self.conv1 = nn.Conv2d(num_inputs, 32, 5, stride=1, padding=2)
        # self.module_list += [self.conv1]
        # self.maxp1 = nn.MaxPool2d(2, 2)
        # self.conv2 = nn.Conv2d(32, 32, 5, stride=1, padding=1)
        # self.module_list += [self.conv2]
        # self.maxp2 = nn.MaxPool2d(2, 2)
        # self.conv3 = nn.Conv2d(32, 64, 4, stride=1, padding=1)
        # self.module_list += [self.conv3]
        # self.maxp3 = nn.MaxPool2d(2, 2)
        # self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=1)
        # self.module_list += [self.conv4]
        # self.maxp4 = nn.MaxPool2d(2, 2)
        self.lin1 = nn.Linear(num_inputs, nnWidth)
        self.module_list += [self.lin1]
        self.lin2 = nn.Linear(nnWidth, nnWidth)
        self.module_list += [self.lin2]
        self.lin3 = nn.Linear(nnWidth, 2 * nnWidth)
        self.module_list += [self.lin3]

        self.lstm = nn.LSTMCell(2 * nnWidth, nnWidth)
        self.module_list += [self.lstm]
        num_outputs = action_space.n
        self.critic_linear = nn.Linear(nnWidth, num_options)
        self.module_list += [self.critic_linear]
        self.actionpolicy = nn.Linear(nnWidth, num_options)
        self.module_list += [self.actionpolicy]
        self.actionpolicy.weight.data = norm_col_init(
            self.actionpolicy.weight.data, 0.01)
        self.actionpolicy.bias.data.fill_(0)

        self.apply(weights_init)
        relu_gain = nn.init.calculate_gain('relu')
        self.lin1.weight.data = norm_col_init(self.lin1.weight.data, 1.0)
        self.lin1.bias.data.fill_(0)

        self.lin2.weight.data = norm_col_init(self.lin2.weight.data, 1.0)
        self.lin2.bias.data.fill_(0)

        self.lin3.weight.data = norm_col_init(self.lin3.weight.data, 1.0)
        self.lin3.bias.data.fill_(0)

        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.train()
예제 #13
0
    def __init__(self, input_dim, action_dim):
        super(ValueNet, self).__init__()
        self.fc1 = nn.Linear(input_dim + action_dim, 512)
        self.fc1.weight.data = norm_col_init(self.fc1.weight.data, 1)

        self.fc2 = nn.Linear(512, 512)
        self.fc2.weight.data = norm_col_init(self.fc2.weight.data, 1)

        self.fc3 = nn.Linear(512, 256)
        self.fc3.weight.data = norm_col_init(self.fc3.weight.data, 1)

        self.fc4 = nn.Linear(256, 1)
        self.fc4.weight.data.uniform_(-EPS, EPS)
예제 #14
0
    def __init__(self, input_dim, num=1):
        super(ValueNet, self).__init__()

        self.critic_linear = nn.Linear(input_dim, num)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 0.1)
        self.critic_linear.bias.data.fill_(0)
예제 #15
0
    def __init__(self, head_dim, args):
        super(Gate, self).__init__()
        gate_input_dim = head_dim
        self.feature_dim = 256
        self.gate_fc1 = nn.Linear(gate_input_dim, self.feature_dim)
        self.gate_fc1.weight.data = norm_col_init(self.gate_fc1.weight.data,
                                                  0.1)
        self.gate_fc1.bias.data.fill_(0)

        self.gate_fc2 = nn.Linear(self.feature_dim, self.feature_dim)
        self.gate_fc2.weight.data = norm_col_init(self.gate_fc2.weight.data,
                                                  0.1)
        self.gate_fc2.bias.data.fill_(0)

        self.gate_fc3 = nn.Linear(self.feature_dim, 2)
        self.gate_fc3.weight.data = norm_col_init(self.gate_fc3.weight.data,
                                                  0.1)
        self.gate_fc3.bias.data.fill_(0)
예제 #16
0
    def __init__(self, observation_space, action_space, n_frames):
        super(A3C_MLP, self).__init__()
        self.action_space = action_space

        self.training_steps = nn.Linear(1, 1)
        self.training_steps.weight.requires_grad = False
        self.training_steps.bias.requires_grad = False

        self.fc1 = nn.Linear(observation_space.shape[0], 256)
        self.lrelu1 = nn.LeakyReLU(0.1)
        self.fc2 = nn.Linear(256, 256)
        self.lrelu2 = nn.LeakyReLU(0.1)
        self.fc3 = nn.Linear(256, 128)
        self.lrelu3 = nn.LeakyReLU(0.1)
        self.fc4 = nn.Linear(128, 128)
        self.lrelu4 = nn.LeakyReLU(0.1)

        self.critic_linear = nn.Linear(128, 1)
        self.actor_linear = nn.Linear(128, action_space.shape[0])
        self.actor_linear2 = nn.Linear(128, action_space.shape[0])

        self.apply(weights_init_mlp)
        self.training_steps.weight.data = torch.Tensor([0])
        self.training_steps.bias.data = torch.Tensor([0])

        lrelu = nn.init.calculate_gain('leaky_relu')
        self.fc1.weight.data.mul_(lrelu)
        self.fc2.weight.data.mul_(lrelu)
        self.fc3.weight.data.mul_(lrelu)
        self.fc4.weight.data.mul_(lrelu)

        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.actor_linear2.weight.data = norm_col_init(
            self.actor_linear2.weight.data, 0.01)
        self.actor_linear2.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)
        self.train()

        self.success_num = 0
        self.done_nums = 0
예제 #17
0
    def __init__(self, pca_dim=PCA[str(PCA_PERCENTAGE)], classes=CLASSES):
        super(SVM, self).__init__()

        self.fc1 = nn.Linear(pca_dim, classes)

        self.apply(weights_init)
        self.fc1.weight.data = norm_col_init(self.fc1.weight.data, 1.0)
        self.fc1.bias.data.fill_(0)

        self.train()
예제 #18
0
 def __init__(self, input_dim, head_name, num=1):
     super(ValueNet, self).__init__()
     if 'ns' in head_name:
         self.noise = True
         self.critic_linear = NoisyLinear(input_dim, num, sigma_init=0.017)
     else:
         self.noise = False
         self.critic_linear = nn.Linear(input_dim, num)
         self.critic_linear.weight.data = norm_col_init(self.critic_linear.weight.data, 0.1)
         self.critic_linear.bias.data.fill_(0)
예제 #19
0
    def __init__(self, input_dim, action_space, head_name):
        super(PolicyNet, self).__init__()
        self.head_name = head_name
        if 'discrete' in head_name:
            num_outputs = action_space.n
            self.continuous = False
        else:
            num_outputs = action_space.shape[0]
            self.continuous = True

        self.actor_linear = nn.Linear(input_dim, num_outputs)
        self.actor_linear2 = nn.Linear(input_dim, num_outputs)

        # init layers
        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.1)
        self.actor_linear.bias.data.fill_(0)
        self.actor_linear2.weight.data = norm_col_init(
            self.actor_linear2.weight.data, 0.1)
        self.actor_linear2.bias.data.fill_(0)
예제 #20
0
    def __init__(self, num_inputs, num_outputs):
        super(A3Clstm, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 32, 3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(32, 64, 3, stride=2, padding=1)
        self.conv4 = nn.Conv2d(64, 512, 3, stride=1, padding=1)

        self.Wai = nn.Linear(ctx_dim[1], ctx_dim[1], bias=False)
        self.Wh = nn.Linear(512, ctx_dim[1], bias=False)
        self.att = nn.Linear(ctx_dim[1], 1)

        # self.fc = nn.Linear(ctx_dim[1] * 4 * 4, 256)

        self.lstm = nn.LSTMCell(ctx_dim[1], 512)
        self.critic_linear = nn.Linear(512, 1)
        self.actor_linear = nn.Linear(512, num_outputs)

        self.apply(weights_init)

        self.Wai.weight.data = norm_col_init(self.Wai.weight.data, 1.0)

        self.Wh.weight.data = norm_col_init(self.Wh.weight.data, 1.0)

        self.att.weight.data = norm_col_init(self.att.weight.data, 1.0)
        self.att.bias.data.fill_(0)

        # self.fc.weight.data = norm_col_init(self.fc.weight.data, 1.0)
        # self.fc.bias.data.fill_(0)

        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)

        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.train()
예제 #21
0
    def __init__(self, num_inputs, action_space, n_frames):
        super(A3C_MLP, self).__init__()
        self.fc1 = nn.Linear(num_inputs, 256)
        self.lrelu1 = nn.LeakyReLU(0.1)
        self.fc2 = nn.Linear(256, 256)
        self.lrelu2 = nn.LeakyReLU(0.1)
        self.fc3 = nn.Linear(256, 128)
        self.lrelu3 = nn.LeakyReLU(0.1)
        self.fc4 = nn.Linear(128, 128)
        self.lrelu4 = nn.LeakyReLU(0.1)

        self.m1 = n_frames * 128
        num_outputs = action_space.shape[0]
        self.critic_linear = nn.Linear(128, 1)
        self.actor_linear = nn.Linear(128, num_outputs)
        self.actor_linear2 = nn.Linear(128, num_outputs)

        self.apply(weights_init_mlp)
        lrelu = nn.init.calculate_gain('leaky_relu')
        self.fc1.weight.data.mul_(lrelu)
        self.fc2.weight.data.mul_(lrelu)
        self.fc3.weight.data.mul_(lrelu)
        self.fc4.weight.data.mul_(lrelu)

        self.actor_linear.weight.data = norm_col_init(self.actor_linear.weight.data, 0.01)
#        self.actor_linear.bias.data.fill_(0)
        self.actor_linear.bias.data.normal_(0, 0.01)

        self.actor_linear2.weight.data = norm_col_init(self.actor_linear2.weight.data, 0.01)
        #self.actor_linear2.bias.data.fill_(0)
        self.actor_linear2.bias.data.normal_(0, 0.01)

        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        #self.critic_linear.bias.data.fill_(0)
        self.critic_linear.bias.data.normal_(0, 1.0)

        self.train()

        #initialize learning rates for each of the components
        self.learning_rates = torch.rand(len(list(self.parameters()))) * 0.001
예제 #22
0
    def __init__(self, input_dim, head_name, num=1, device=torch.device('cpu')):
        super(AMCValueNet, self).__init__()
        self.head_name = head_name
        self.device = device

        if 'ns' in head_name:
            self.noise = True
            self.critic_linear = NoisyLinear(input_dim, num, sigma_init=0.017)
        if 'onlyJ' in head_name:
            self.noise = False
            self.critic_linear = nn.Linear(input_dim, num)
            self.critic_linear.weight.data = norm_col_init(self.critic_linear.weight.data, 0.1)
            self.critic_linear.bias.data.fill_(0)
        else:
            self.noise = False
            self.critic_linear = nn.Linear(2 * input_dim, num)
            self.critic_linear.weight.data = norm_col_init(self.critic_linear.weight.data, 0.1)
            self.critic_linear.bias.data.fill_(0)

            self.attention = AttentionLayer(input_dim, input_dim, device)
        self.feature_dim = input_dim
예제 #23
0
    def __init__(self,
                 num_inputs,
                 action_space,
                 quantile_embedding_dim=64,
                 num_quantiles=32):
        super(A3Clstm, self).__init__()
        #Input Shape = [1,1,80,80]
        self.quantile_embedding_dim = quantile_embedding_dim
        self.num_quantiles = num_quantiles

        self.conv1 = nn.Conv2d(num_inputs, 32, 5, stride=1,
                               padding=2)  #Shape = [1,32,80,80]
        self.maxp1 = nn.MaxPool2d(2, 2)  #Shape = [1,32,40,40]
        self.conv2 = nn.Conv2d(32, 32, 5, stride=1,
                               padding=1)  #Shape = [1,32,38,38]
        self.maxp2 = nn.MaxPool2d(2, 2)  #Shape = [1,32,19,19]
        self.conv3 = nn.Conv2d(32, 64, 4, stride=1, padding=1)
        self.maxp3 = nn.MaxPool2d(2, 2)  #Shape = [1,64,9,9]
        self.conv4 = nn.Conv2d(64, 64, 3, stride=1, padding=1)
        self.maxp4 = nn.MaxPool2d(2, 2)  #Shape = [1,64,4,4]

        #self.lstm = nn.LSTMCell(1024, 512) #Shape = [1,512]
        num_outputs = action_space.n
        self.critic_linear = nn.Linear(512, num_outputs)
        #        self.actor_linear = nn.Linear(512, num_outputs)

        self.quantile_linear = nn.Linear(64, 1024)
        self.middle_linear = nn.Linear(1024, 512)

        self.apply(weights_init)
        relu_gain = nn.init.calculate_gain('relu')
        self.conv1.weight.data.mul_(relu_gain)
        self.conv2.weight.data.mul_(relu_gain)
        self.conv3.weight.data.mul_(relu_gain)
        self.conv4.weight.data.mul_(relu_gain)
        #        self.actor_linear.weight.data = norm_col_init(
        #            self.actor_linear.weight.data, 0.01)
        #        self.actor_linear.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 0.01)
        self.critic_linear.bias.data.fill_(0)
        self.quantile_linear.weight.data = unif_col_init(
            self.quantile_linear.weight.data, std=1.0 / np.sqrt(3.0))
        self.quantile_linear.bias.data.fill_(0)
        self.middle_linear.weight.data = unif_col_init(
            self.middle_linear.weight.data, std=1.0 / np.sqrt(3.0))
        self.middle_linear.bias.data.fill_(0)

        #        self.lstm.bias_ih.data.fill_(0)
        #        self.lstm.bias_hh.data.fill_(0)

        self.train()
예제 #24
0
def process_output(rnn_out, outputs, a_size, num_units):
    """
    here we compute the policy (the probability of each action)
    and the value from the output of the RNN
    """
    # Actions
    actions = tf.placeholder(shape=[None], dtype=tf.int32)
    actions_onehot = tf.one_hot(actions, a_size, dtype=tf.float32)

    # Output layers for policy and value estimations
    policy = slim.fully_connected(rnn_out,
                                  a_size,
                                  activation_fn=tf.nn.softmax,
                                  weights_initializer=ut.norm_col_init(0.01),
                                  biases_initializer=None)
    value = slim.fully_connected(rnn_out,
                                 1,
                                 activation_fn=None,
                                 weights_initializer=ut.norm_col_init(1.0),
                                 biases_initializer=None)

    return actions, actions_onehot, policy, value
예제 #25
0
    def __init__(self, num_inputs, action_space, n_frames):
        super(A3C_MLP, self).__init__()
        self.fc1 = nn.Linear(num_inputs, 256)
        self.lrelu1 = nn.LeakyReLU(0.1)
        self.fc2 = nn.Linear(256, 256)
        self.lrelu2 = nn.LeakyReLU(0.1)
        self.fc3 = nn.Linear(256, 128)
        self.lrelu3 = nn.LeakyReLU(0.1)
        self.fc4 = nn.Linear(128, 128)
        self.lrelu4 = nn.LeakyReLU(0.1)

        self.m1 = n_frames * 128
        self.lstm = nn.LSTMCell(self.m1, 128)
        num_outputs = action_space.shape[0]
        self.critic_linear = nn.Linear(128, 1)
        self.actor_linear = nn.Linear(128, num_outputs)
        self.actor_linear2 = nn.Linear(128, num_outputs)

        self.apply(weights_init_mlp)
        lrelu = nn.init.calculate_gain('leaky_relu')
        self.fc1.weight.data.mul_(lrelu)
        self.fc2.weight.data.mul_(lrelu)
        self.fc3.weight.data.mul_(lrelu)
        self.fc4.weight.data.mul_(lrelu)

        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.actor_linear2.weight.data = norm_col_init(
            self.actor_linear2.weight.data, 0.01)
        self.actor_linear2.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.train()
예제 #26
0
    def __init__(self, num_inputs, action_space):
        super(A3C_CONV, self).__init__()
        self.conv1 = nn.Conv1d(num_inputs, 32, 3, stride=1, padding=1)
        self.lrelu1 = nn.LeakyReLU(0.1)
        self.conv2 = nn.Conv1d(32, 32, 3, stride=1, padding=1)
        self.lrelu2 = nn.LeakyReLU(0.1)
        self.conv3 = nn.Conv1d(32, 64, 2, stride=1, padding=1)
        self.lrelu3 = nn.LeakyReLU(0.1)
        self.conv4 = nn.Conv1d(64, 64, 1, stride=1)
        self.lrelu4 = nn.LeakyReLU(0.1)

        self.lstm = nn.LSTMCell(1600, 128)
        num_outputs = action_space.shape[0]
        self.critic_linear = nn.Linear(128, 1)
        self.actor_linear = nn.Linear(128, num_outputs)
        self.actor_linear2 = nn.Linear(128, num_outputs)

        self.apply(weights_init)
        lrelu_gain = nn.init.calculate_gain('leaky_relu')
        self.conv1.weight.data.mul_(lrelu_gain)
        self.conv2.weight.data.mul_(lrelu_gain)
        self.conv3.weight.data.mul_(lrelu_gain)
        self.conv4.weight.data.mul_(lrelu_gain)

        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.01)
        self.actor_linear.bias.data.fill_(0)
        self.actor_linear2.weight.data = norm_col_init(
            self.actor_linear2.weight.data, 0.01)
        self.actor_linear2.bias.data.fill_(0)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 1.0)
        self.critic_linear.bias.data.fill_(0)

        self.lstm.bias_ih.data.fill_(0)
        self.lstm.bias_hh.data.fill_(0)

        self.train()
예제 #27
0
    def __init__(self, pca_dim=453, classes=76):
        super(NET, self).__init__()

        self.fc1 = nn.Linear(pca_dim, 128)
        self.fc2 = nn.Linear(128, 256)
        self.fc3 = nn.Linear(256, 256)
        self.fc4 = nn.Linear(256, 128)
        self.fc5 = nn.Linear(128, classes)

        self.apply(weights_init)

        self.fc1.weight.data = norm_col_init(self.fc1.weight.data, 1.0)
        self.fc1.bias.data.fill_(0)
        self.fc2.weight.data = norm_col_init(self.fc2.weight.data, 1.0)
        self.fc2.bias.data.fill_(0)
        self.fc3.weight.data = norm_col_init(self.fc3.weight.data, 1.0)
        self.fc3.bias.data.fill_(0)
        self.fc4.weight.data = norm_col_init(self.fc4.weight.data, 1.0)
        self.fc4.bias.data.fill_(0)
        self.fc5.weight.data = norm_col_init(self.fc5.weight.data, 1.0)
        self.fc5.bias.data.fill_(0)

        self.train()
예제 #28
0
    def __init__(self,
                 outdim,
                 action_space,
                 lstm_out=128,
                 head_name='cnn_lstm',
                 stack_frames=1):
        super(Policy, self).__init__()
        self.head_name = head_name
        if 'lstm' in self.head_name:
            feature_dim = lstm_out
        else:
            feature_dim = outdim

        #  create actor
        if 'discrete' in head_name:
            num_outputs = action_space.n
        else:
            num_outputs = action_space.shape[0]

        self.actor_linear = nn.Linear(feature_dim, num_outputs)
        self.actor_linear2 = nn.Linear(feature_dim, num_outputs)

        self.actor_linear.weight.data = norm_col_init(
            self.actor_linear.weight.data, 0.1)
        self.actor_linear.bias.data.fill_(0)
        self.actor_linear2.weight.data = norm_col_init(
            self.actor_linear2.weight.data, 0.1)
        self.actor_linear2.bias.data.fill_(0)

        # create critic
        if 'mc' in head_name:
            self.critic_linear = nn.Linear(feature_dim, num_outputs)
        else:
            self.critic_linear = nn.Linear(feature_dim, 1)
        self.critic_linear.weight.data = norm_col_init(
            self.critic_linear.weight.data, 0.1)
        self.critic_linear.bias.data.fill_(0)
예제 #29
0
    def __init__(self,
                 obs_space,
                 action_space,
                 rnn_out=128,
                 head_name='cnn_lstm',
                 stack_frames=1,
                 dim_action_tracker=-1,
                 device=None):
        super(TAT, self).__init__()
        if dim_action_tracker > 0:
            self.sub_task = True
        else:
            self.sub_task = False
        self.head_name = head_name
        if 'cnn' in head_name:
            self.encoder = perception.CNN_simple(obs_space, stack_frames)
        if 'icml' in head_name:
            self.encoder = perception.ICML(obs_space, stack_frames)
        if 'maze' in head_name:
            self.encoder = perception.CNN_maze(obs_space, stack_frames)
        feature_dim = self.encoder.outdim

        if 'lstm' in head_name:
            self.lstm = nn.LSTMCell(feature_dim, rnn_out)
            self.lstm.bias_ih.data.fill_(0)
            self.lstm.bias_hh.data.fill_(0)
            feature_dim = rnn_out
        if 'gru' in head_name:
            self.lstm = nn.GRUCell(feature_dim, rnn_out)
            self.lstm.bias_ih.data.fill_(0)
            self.lstm.bias_hh.data.fill_(0)
            feature_dim = rnn_out

        #  create actor
        self.actor = PolicyNet(feature_dim, action_space, head_name, device)
        self.critic = ValueNet(feature_dim)

        self.fc_action_tracker = nn.Linear(dim_action_tracker,
                                           self.encoder.outdim)
        weights_init_mlp(self.fc_action_tracker)
        # create sub-task
        if self.sub_task:
            self.reward_aux = nn.Linear(feature_dim, 1)
            self.reward_aux.weight.data = norm_col_init(
                self.reward_aux.weight.data, 0.01)
            self.reward_aux.bias.data.fill_(0)

        self.apply(weights_init)
        self.train()
예제 #30
0
    def __init__(self, input_dim, action_space, head_name, device):
        super(PolicyNet, self).__init__()
        self.head_name = head_name
        self.device = device
        num_outputs = action_space.n

        if 'ns' in head_name:
            self.noise = True
            self.actor_linear = NoisyLinear(input_dim, num_outputs, sigma_init=0.017)
        else:
            self.noise = False
            self.actor_linear = nn.Linear(input_dim, num_outputs)

            # init layers
            self.actor_linear.weight.data = norm_col_init(self.actor_linear.weight.data, 0.1)
            self.actor_linear.bias.data.fill_(0)