Пример #1
0
    def actor_optimizer(self):
        action = K.placeholder(shape=[None, self.action_size])
        advantages = K.placeholder(shape=[
            None,
        ])

        policy = self.actor.output

        good_prob = K.sum(action * policy, axis=1)
        eligibility = K.log(good_prob + 1e-10) * advantages
        actor_loss = -K.mean(eligibility)

        entropy = K.mean(policy * K.log(policy + 1e-10), axis=1)
        entropy = K.mean(entropy)

        loss = actor_loss + 0.01 * entropy

        optimizer = RMSprop(lr=self.actor_lr,
                            rho=0.99,
                            epsilon=0.00001,
                            decay=0.99,
                            clipnorm=0.5)
        # optimizer = Adam(lr=self.actor_lr)
        updates = optimizer.get_updates(self.actor.trainable_weights, [], loss)
        train = K.function([self.actor.input, action, advantages], [loss],
                           updates=updates)

        return train
Пример #2
0
    def actor_optimizer(self):
        """
        This method updates actor network (policy network)
        """
        action = K.placeholder(shape=[None, self.action_size])
        advantages = K.placeholder(shape=[
            None,
        ])
        policy = self.actor.output

        action_prob = K.sum(action * policy, axis=1)
        # Cross entropy loss function about policy
        cross_entropy = K.log(action_prob + 1e-10) * advantages
        cross_entropy = -K.sum(cross_entropy)

        # Remained actor network agents should continuously interact environment
        # This is entropy loss function for continuous exploration
        entropy = K.sum(policy * K.log(policy + 1e-10), axis=1)
        entropy = K.sum(entropy)

        # You create final loss function by adding two loss functions
        loss = cross_entropy + 0.01 * entropy

        optimizer = RMSprop(lr=self.actor_lr, rho=0.99, epsilon=0.01)
        updates = optimizer.get_updates(self.actor.trainable_weights, [], loss)
        train = K.function([self.actor.input, action, advantages], [loss],
                           updates=updates)
        return train
    def optimizer(self):

        a = K.placeholder(shape=(None, ), dtype='int32')
        y = K.placeholder(shape=(None, ), dtype='float32')

        # the output tensor for the state-action pairs
        py_x = self.q_duelling_part.output

        a_one_hot = K.one_hot(a, 3)
        q_value = K.sum(py_x * a_one_hot, axis=1)

        error = K.abs(y - q_value)

        quadratic_part = K.clip(error, 0.0, 1.0)
        linear_part = error - quadratic_part

        loss = K.mean(0.5 * K.square(quadratic_part) + linear_part)

        optimizer = RMSprop(lr=0.00025, epsilon=0.01)

        updates = optimizer.get_updates(self.q_duelling_part.trainable_weights,
                                        [], loss)

        train = K.function([self.q_duelling_part.input, a, y], [loss],
                           updates=updates)

        return train
Пример #4
0
    def actor_optimizer(self):
        action = K.placeholder(shape=[None, self.action_size])
        advantages = K.placeholder(shape=[
            None,
        ])

        policy = self.actor.output

        # actor loss
        good_prob = K.sum(action * policy, axis=1)
        eligibility = K.log(good_prob + 1e-10) * advantages
        actor_loss = -K.sum(eligibility)

        # entropy-loss to encourage exploration
        entropy = K.sum(policy * K.log(policy + 1e-10), axis=1)  # None,1
        entropy = K.sum(entropy)  # scalar

        # total loss
        loss = actor_loss + 0.01 * entropy
        optimizer = RMSprop(lr=self.actor_lr, rho=0.99, epsilon=0.01)
        updates = optimizer.get_updates(self.actor.trainable_weights, [], loss)
        train = K.function([self.actor.input, action, advantages], [loss],
                           updates=updates)

        return train
Пример #5
0
    def optimizer(self):

        a = K.placeholder(shape=(None, ), dtype='int32')

        y = K.placeholder(shape=(None, ), dtype='float32')

        prediction = self.model.output

        a_one_hot = K.one_hot(a, self.action_size)

        q_value = K.sum(prediction * a_one_hot, axis=1)

        error = K.abs(y - q_value)

        quadratic_part = K.clip(error, 0.0, 1.0)

        linear_part = error - quadratic_part

        loss = K.mean(0.5 * K.square(quadratic_part) + linear_part)

        optimizer = RMSprop(lr=0.00025, epsilon=0.01)

        updates = optimizer.get_updates(self.model.trainable_weights, [], loss)

        train = K.function([self.model.input, a, y], [loss], updates=updates)

        return train
Пример #6
0
    def actor_optimizer(self):
        action = K.placeholder(shape=[None, self.action_size])
        advantages = K.placeholder(shape=[
            None,
        ])

        policy = self.actor.output

        # Policy cross entropy error function
        action_prob = K.sum(action * policy, axis=1)
        cross_entropy = K.log(action_prob + 1e-10) * advantages
        cross_entropy = -K.sum(cross_entropy)

        # Entropy error for exploring steadily
        entropy = K.sum(policy * K.log(policy + 1e-10), axis=1)
        entropy = K.sum(entropy)

        # Generation of final error function by adding two error functions
        loss = cross_entropy + 0.01 * entropy

        optimizer = RMSprop(lr=self.actor_lr, rho=0.99, epsilon=0.01)
        updates = optimizer.get_updates(self.actor.trainable_weights, [], loss)
        train = K.function([self.actor.input, action, advantages], [loss],
                           updates=updates)
        return train
Пример #7
0
    def actor_optimizer(self):
        action = K.placeholder(shape=[None, self.action_size])
        advantages = K.placeholder(shape=[
            None,
        ])

        policy = self.actor.output

        # Policy cross-entropy loss function
        action_prob = K.sum(action * policy, axis=1)
        cross_entropy = K.log(action_prob + 1e-10) * advantages
        cross_entropy = -K.sum(cross_entropy)

        # Entropy loss for continuous exploration
        entropy = K.sum(policy * K.log(policy + 1e-10), axis=1)
        entropy = K.sum(entropy)

        # Final loss function using both entropy
        loss = cross_entropy + 0.01 * entropy

        optimizer = RMSprop(lr=self.actor_lr,
                            rho=0.99,
                            epsilon=0.01,
                            clipnorm=40)
        updates = optimizer.get_updates(self.actor.trainable_weights, [], loss)
        train = K.function([self.actor.input, action, advantages], [loss],
                           updates=updates)
        return train
Пример #8
0
    def actor_optimizer(self):
        action = K.placeholder(shape=[None, self.action_size])
        advantages = K.placeholder(shape=[
            None,
        ])

        policy = self.actor.output

        # 정책 크로스 엔트로피 오류함수
        action_prob = K.sum(action * policy, axis=1)
        cross_entropy = K.log(action_prob + 1e-10) * advantages
        cross_entropy = -K.sum(cross_entropy)

        # 탐색을 지속적으로 하기 위한 엔트로피 오류
        entropy = K.sum(policy * K.log(policy + 1e-10), axis=1)
        entropy = K.sum(entropy)

        # 두 오류함수를 더해 최종 오류함수를 만듬
        loss = cross_entropy + 0.01 * entropy

        optimizer = RMSprop(lr=self.actor_lr, rho=0.99, epsilon=0.01)
        updates = optimizer.get_updates(self.actor.trainable_weights, [], loss)
        train = K.function([self.actor.input, action, advantages], [loss],
                           updates=updates)
        return train
Пример #9
0
    def optimizer(self):
        a = K.placeholder(shape=(None, ), dtype='int32')
        y = K.placeholder(shape=(None, ), dtype='float32')

        py_x = self.model.output  #output is the Q value of each action

        a_one_hot = K.one_hot(a, self.action_size)
        q_value = K.sum(py_x * a_one_hot,
                        axis=1)  #one hot encoding Q value of actions
        error = K.abs(
            y - q_value
        )  #looks like MAE for Huber Loss, caring about all losses equally

        quadratic_part = K.clip(error, 0.0, 1.0)
        linear_part = error - quadratic_part
        loss = K.mean(0.5 * K.square(quadratic_part) +
                      linear_part)  #the MSE for low error values

        optimizer = RMSprop(lr=0.00025, epsilon=0.01)  #
        #get_updates is tf (loss, param)
        updates = optimizer.get_updates(self.model.trainable_weights, [], loss)

        #instantiate Keras fxn but args are(list placholder tensors, list output tensor, list update ops)
        #why output tensor is loss?
        train = K.function([self.model.input, a, y], [loss],
                           updates=updates)  #GD step?

        return train
Пример #10
0
def wgan_loss(inputshape, noiseshape, generator, discriminator, K ):
    
    opt = RMSprop(lr=5e-5, clipvalue=0.01)
    
    realimg = Input(shape=imageshape)
    noise = Input(shape=noiseshape)
    fakeimg = generator(noise)
    d_real = discriminator(realimg)
    d_fake = discriminator(fakeimg)
    d_loss1 = K.mean(d_real, axis=-1)
    d_loss2 = K.mean(d_fake, axis=-1)
    d_loss = - d_loss1 + d_loss2
    d_training_updates = opt.get_updates(discriminator.trainable_weights,[], d_loss)
    d_train = K.function([realimg, noise], [d_loss], d_training_updates)
    g_loss =  - K.mean(d_fake, axis=-1)
    g_training_updates = opt.get_updates(generator.trainable_weights,[], g_loss)
    g_train = K.function([noise], [g_loss], g_training_updates)
    
    return d_train, g_train
Пример #11
0
def _build_model(graph, state_size, skip_frames, action_size, learning_rate):
    __keras_imports()
    INPUT_SHAPE = (state_size, )  # input image size to model
    ACTION_SIZE = action_size
    # With the functional API we need to define the inputs.
    LInput = layers.Input(INPUT_SHAPE, name='inputs')

    # "The final hidden layer is fully-connected and consists of 256 rectifier units."
    h1 = layers.Dense(64, activation='relu')(LInput)
    h2 = layers.Dense(128, activation='relu')(h1)
    rms_opt = RMSprop(lr=learning_rate, epsilon=0.1, rho=0.99)

    x1 = layers.Dense(128, activation='relu')(h2)
    x2 = layers.Dense(128, activation='relu')(h2)

    # "The output layer is a fully-connected linear layer with a single output for each valid action."
    output_actions = layers.Dense(ACTION_SIZE,
                                  activation='softmax',
                                  name='out1')(x1)

    output_value = layers.Dense(1, activation='linear', name='out2')(x2)

    pmodel = Model(inputs=[LInput], outputs=[output_actions])
    vmodel = Model(inputs=[LInput], outputs=[output_value])

    action_pl = K.placeholder(shape=(None, action_size))
    advantages_pl = K.placeholder(shape=(None, ))
    discounted_r = K.placeholder(shape=(None, ))

    weighted_actions = K.sum(action_pl * pmodel.output, axis=1)
    eligibility = K.log(weighted_actions +
                        1e-10) * K.stop_gradient(advantages_pl)
    entropy = K.sum(pmodel.output * K.log(pmodel.output + 1e-10), axis=1)
    ploss = 0.001 * entropy - K.sum(eligibility)
    updates = rms_opt.get_updates(pmodel.trainable_weights, [], ploss)
    optimizer = K.function([pmodel.input, action_pl, advantages_pl], [],
                           updates=updates)

    closs = K.mean(K.square(discounted_r - vmodel.output))
    updates2 = rms_opt.get_updates(vmodel.trainable_weights, [], closs)
    optimizer2 = K.function([vmodel.input, discounted_r], [], updates=updates2)

    return (pmodel, vmodel, optimizer, optimizer2)
Пример #12
0
    def _compile_learning(self):
        # Tensor Variables
        s = K.placeholder(shape=tuple([None] + [self.history_len] +
                                      self.state_shape))
        a = K.placeholder(ndim=1, dtype='int32')
        r = K.placeholder(ndim=1, dtype='float32')
        s2 = K.placeholder(shape=tuple([None] + [self.history_len] +
                                       self.state_shape))
        t = K.placeholder(ndim=1, dtype='float32')

        # Q(s, a)
        q = self.network(s / self.normalize)
        preds = slice_tensor_tensor(q, a)

        # r + (1 - t) * gamma * max_a(Q'(s'))
        q2 = self.target_network(s2 / self.normalize)
        if self.ddqn:
            q2_net = K.stop_gradient(self.network(s2 / self.normalize))
            a_max = K.argmax(q2_net, axis=1)
            q2_max = slice_tensor_tensor(q2, a_max)
        else:
            q2_max = K.max(q2, axis=1)

        # over-estimation correction
        if len(self.bootstrap_corr) > 0:
            q2_max -= (q2_max - np.float32(self.bootstrap_corr[1])) * (
                q2_max > self.bootstrap_corr[1])
            q2_max -= (q2_max - np.float32(self.bootstrap_corr[0])) * (
                q2_max < self.bootstrap_corr[0])

        targets = r + (np.float32(1) - t) * self.gamma * q2_max

        # Loss and Updates
        cost = clipped_sum_error(y_true=targets, y_pred=preds)
        optimizer = RMSprop(lr=self.learning_rate, rho=.95, epsilon=1e-7)
        updates = optimizer.get_updates(params=self.network.trainable_weights,
                                        loss=cost,
                                        constraints={})

        # Update Target Network
        target_updates = []
        for target_weight, network_weight in zip(
                self.target_network.trainable_weights,
                self.network.trainable_weights):
            target_updates.append(K.update(target_weight, network_weight))

        # Compiled Functions
        self._train_on_batch = K.function(inputs=[s, a, r, s2, t],
                                          outputs=[cost],
                                          updates=updates)
        self.predict_network = K.function(inputs=[s], outputs=[q])
        self.predict_target = K.function(inputs=[s2], outputs=[q2])
        self.update_weights = K.function(inputs=[],
                                         outputs=[],
                                         updates=target_updates)
Пример #13
0
 def __update_critic__(self):
     discounted_rewards = K.placeholder(shape=(None, ))
     value = self.critic_.output
     
     loss = K.mean(K.square(discounted_rewards - value))
     
     gradient = RMSprop()
     updates = gradient.get_updates(self.critic_.trainable_weights, [], loss)
     train = K.function([self.critic_.input, discounted_rewards],[self.critic_.output],updates=updates)
     
     return train
Пример #14
0
    def critic_optimizer(self):
        discounted_reward = K.placeholder(shape=(None, ))

        value = self.critic.output

        loss = K.mean(K.square(discounted_reward - value))

        optimizer = RMSprop(lr=self.critic_lr, rho=0.99, epsilon=0.01)
        updates = optimizer.get_updates(self.critic.trainable_weights, [], loss)
        train = K.function([self.critic.input, discounted_reward], [loss], updates=updates)
        return train
    def critic_optimizer(self):
        discounted_reward = K.placeholder(shape=(None, ))

        value = self.critic.output

        loss = K.mean(K.square(discounted_reward - value))

        optimizer = RMSprop(lr=self.critic_lr, rho=0.99, epsilon=0.01)
        updates = optimizer.get_updates(self.critic.trainable_weights, [], loss)
        train = K.function([self.critic.input, discounted_reward], [loss], updates=updates)
        return train
Пример #16
0
    def _compile_learning(self):
        s = K.placeholder(shape=tuple([None] + [self.history_len] + self.state_shape))
        a = K.placeholder(ndim=1, dtype='int32')
        r = K.placeholder(ndim=2, dtype='float32')
        s2 = K.placeholder(shape=tuple([None] + [self.history_len] + self.state_shape))
        t = K.placeholder(ndim=1, dtype='float32')

        updates = []
        costs = 0
        qs = []
        q2s = []
        for i in range(len(self.networks)):
            local_s = s
            local_s2 = s2
            if self.remove_features:
                local_s = self._remove_features(local_s, i)
                local_s2 = self._remove_features(local_s2, i)
            qs.append(self.networks[i](local_s))
            q2s.append(self.target_networks[i](local_s2))
            if self.use_hra:
                cost = self._compute_cost(qs[-1], a, r[:, i], t, q2s[-1])
                optimizer = RMSprop(lr=self.learning_rate, rho=.95, epsilon=1e-7)
                updates += optimizer.get_updates(params=self.networks[i].trainable_weights, loss=cost, constraints={})
                costs += cost
        if not self.use_hra:
            q = sum(qs)
            q2 = sum(q2s)
            summed_reward = K.sum(r, axis=-1)
            cost = self._compute_cost(q, a, summed_reward, t, q2)
            optimizer = RMSprop(lr=self.learning_rate, rho=.95, epsilon=1e-7)
            updates += optimizer.get_updates(params=self.all_params, loss=cost, constraints={})
            costs += cost

        target_updates = []
        for network, target_network in zip(self.networks, self.target_networks):
            for target_weight, network_weight in zip(target_network.trainable_weights, network.trainable_weights):
                target_updates.append(K.update(target_weight, network_weight))

        self._train_on_batch = K.function(inputs=[s, a, r, s2, t], outputs=[costs], updates=updates)
        self.predict_network = K.function(inputs=[s], outputs=qs)
        self.update_weights = K.function(inputs=[], outputs=[], updates=target_updates)
Пример #17
0
    def _critic_optimizer(self):
        discount_prediction = K.placeholder(shape=(None,))

        value = self.cric.output

        # [반환값 - 가치]의 제곱을 오류함수로 함.
        loss = K.mean(K.square(discount_prediction - value))

        optimizer = RMSprop(lr=2.5e-4, rho=0.99, epsilon=0.01)
        updates = optimizer.get_updates(self.cric.trainable_weights, [], loss)
        train = K.function([self.cric.input, discount_prediction], [loss], updates=updates)
        return train
    def critic_optimize(self):
        target = K.placeholder(shape=(None, ))

        loss = K.mean(K.square(target - self.critic.output))

        optimizer = RMSprop(lr=self.critic_learning_rate,
                            rho=0.99,
                            epsilon=0.01)
        update = optimizer.get_updates(self.critic.trainable_weights, [], loss)
        train = K.function([self.critic.input, target], [loss], updates=update)

        return train
Пример #19
0
		def critic_optimizer():
			R = K.placeholder(shape=(None,))
			critic = model_critic.output
			critic = K.print_tensor(critic, message='critic: ')
			Lv = K.mean(K.square(R - critic))
			Lv = K.sum(Lv)
			Lv = K.print_tensor(Lv, message='Lv: ')
			optimizer = RMSprop(lr=2.5e-4, rho=0.99, epsilon=0.01)
			#optimizer = my_optimizer_critic
			updates = optimizer.get_updates(model_critic.trainable_weights, [], Lv)
			train = K.function([model_critic.input, R], [Lv], updates=updates)
			return train		
Пример #20
0
    def critic_optimizer2(self):
        target = K.placeholder(shape=[
            None,
        ])

        loss = K.mean(K.square(target - self.critic.output))

        optimizer = RMSprop(lr=self.critic_lr * 50)
        updates = optimizer.get_updates(self.critic.trainable_weights, [],
                                        loss)
        train = K.function([self.critic.input, target], [], updates=updates)

        return train
Пример #21
0
 def optimizer(self):
     """ The critic loss: mean squared error over discounted rewards """
     
     #Placeholders
     discounted_returns_placeholder = K.placeholder(name='discounted_return',shape=(None,))        
     critic_loss = K.mean(K.square(discounted_returns_placeholder - self.model.output))
     
     #Define optimizer
     adam_critic = RMSprop(lr = self.lr, epsilon = 0.1, rho = 0.99)  #arbitray
     pars = self.model.trainable_weights
     updates = adam_critic.get_updates(params=pars,loss=critic_loss)
     
     return K.function([self.model.input, discounted_returns_placeholder], [], updates=updates)
Пример #22
0
		def actor_optimizer():
			a_t = K.placeholder(shape=[None, ACTION_COUNT])
			A = K.placeholder(shape=(None, ))
			policy = model_actor.output
			Lpi = -K.sum(K.log(K.sum(policy*a_t, axis=1) + 1e-10) * A)
			LH = K.sum(K.sum(pi * K.log(policy + 1e-10), axis=1))
			L = Lpi + 0.01 * LH
			
			optimizer = RMSprop(lr=2.5e-4, rho=0.99, epsilon=0.01)
			#optimizer = my_optimizer
			updates = optimizer.get_updates(model_actor.trainable_weights, [], L)
			train = K.function([model_actor.input, a_t, A], [L], updates=updates)
			return train
Пример #23
0
 def __update_actor__(self):
     action = K.placeholder(shape=(None, self.output_shape_))
     advantages = K.placeholder(shape=(None, ))
     policy = self.actor_.output
     
     good_prob = K.sum(action * policy, axis=1)
     eligibility = K.log(good_prob+1e-10) * K.stop_gradient(advantages)
     loss = -K.sum(eligibility)
     
     gradient = RMSprop()
     updates = gradient.get_updates(self.actor_.trainable_weights,[],loss)
     train = K.function([self.actor_.input,action,advantages],[self.actor_.output],updates=updates)
     
     return train
Пример #24
0
    def actor_optimizer(self):
        action = K.placeholder(shape=[None, self.action_size])
        advantage = K.placeholder(shape=[
            None,
        ])

        action_prob = K.sum(action * self.actor.output, axis=1)
        cross_entropy = K.log(action_prob) * advantage
        loss = -K.sum(cross_entropy)

        optimizer = RMSprop(lr=self.actor_lr)
        updates = optimizer.get_updates(self.actor.trainable_weights, [], loss)
        train = K.function([self.actor.input, action, advantage], [],
                           updates=updates)
        return train
Пример #25
0
    def critic_optimizer(self):
        """
        This method updates critic network (value network)
        """
        discounted_prediction = K.placeholder(shape=(None, ))
        value = self.critic.output

        # You use loss function as mean squre error
        loss = K.mean(K.square(discounted_prediction - value))

        optimizer = RMSprop(lr=self.critic_lr, rho=0.99, epsilon=0.01)
        updates = optimizer.get_updates(self.critic.trainable_weights, [],
                                        loss)
        train = K.function([self.critic.input, discounted_prediction], [loss],
                           updates=updates)
        return train
Пример #26
0
    def __mse(self):
        '''
        Mean squared error loss
        :return: Keras function
        '''
        q_values = self._online_model.output
        # trace of taken actions
        target = K.placeholder(shape=(None, ), name='target_value')
        a_1_hot = K.placeholder(shape=(None, self._action_dim), name='chosen_actions')

        q_value = K.sum(q_values * a_1_hot, axis=1)
        squared_error = K.square(target - q_value)
        mse = K.mean(squared_error)
        optimizer = RMSprop(lr=self._lr)
        updates = optimizer.get_updates(loss=mse, params=self._online_model.trainable_weights)

        return K.function(inputs=[self._online_model.input, target, a_1_hot], outputs=[], updates=updates)
    def __build_train_fn(self):
        action_Q_placeholder = self.policy_network.output
        action_onehot_placeholder = K.placeholder(shape=(None, self.n_action))
        target_Q_placeholder = K.placeholder(shape=(None, ))

        action_Qvalue = K.sum(action_Q_placeholder * action_onehot_placeholder,
                              axis=1)
        loss = K.max(K.square(action_Qvalue - target_Q_placeholder))
        adam = Adam(lr=0.0001)
        rmsprop = RMSprop(lr=0.0001, rho=0.99)
        updates = rmsprop.get_updates(
            params=self.policy_network.trainable_weights, loss=loss)
        self.train_fn = K.function(inputs=[
            self.policy_network.input, action_onehot_placeholder,
            target_Q_placeholder
        ],
                                   outputs=[],
                                   updates=updates)
Пример #28
0
    def build_functions(self):
        S = Input(shape=self.state_size)
        NS = Input(shape=self.state_size)
        A = Input(shape=(1, ), dtype='int32')
        R = Input(shape=(1, ), dtype='float32')
        T = Input(shape=(1, ), dtype='int32')
        self.build_model()
        self.value_fn = K.function([S], self.model(S))

        VS = self.model(S)
        VNS = disconnected_grad(self.model(NS))
        future_value = (1 - T) * VNS.max(axis=1, keepdims=True)
        discounted_future_value = self.discount * future_value
        target = R + discounted_future_value
        cost = ((VS[:, A] - target)**2).mean()
        opt = RMSprop(0.0001)
        params = self.model.trainable_weights
        updates = opt.get_updates(params, [], cost)
        self.train_fn = K.function([S, NS, A, R, T], cost, updates=updates)
    def actor_optimizer(self):
        action = K.placeholder(shape=(None, self.output_pa))
        advantages = K.placeholder(shape=(None, ))

        policy = self.actor.output

        good_prob = K.sum(action * policy, axis=1)
        eligibility = K.log(good_prob + 1e-10) * K.stop_gradient(advantages)
        loss = -K.sum(eligibility)

        entropy = K.sum(policy * K.log(policy + 1e-10), axis=1)

        actor_loss = loss + 0.01*entropy

        # optimizer = Adam(lr=0.01)
        optimizer = RMSprop(lr=2.5e-4, rho=0.99, epsilon=0.0001)
        updates = optimizer.get_updates(self.actor.trainable_weights, [], actor_loss)
        train = K.function([self.actor.input, action, advantages], [], updates=updates)
        return train
Пример #30
0
    def build_functions(self):
        S = Input(shape=self.state_size)
        NS = Input(shape=self.state_size)
        A = Input(shape=(1,), dtype='int32')
        R = Input(shape=(1,), dtype='float32')
        T = Input(shape=(1,), dtype='int32')
        self.build_model()
        self.value_fn = K.function([S], self.model(S))

        VS = self.model(S)
        VNS = disconnected_grad(self.model(NS))
        future_value = (1-T) * VNS.max(axis=1, keepdims=True)
        discounted_future_value = self.discount * future_value
        target = R + discounted_future_value
        cost = ((VS[:, A] - target)**2).mean()
        opt = RMSprop(0.0001)
        params = self.model.trainable_weights
        updates = opt.get_updates(params, [], cost)
        self.train_fn = K.function([S, NS, A, R, T], cost, updates=updates)
Пример #31
0
    def actor_optimizer(self):
        action = K.placeholder(shape=[None, self.action_size])
        advantages = K.placeholder(shape=[None, ])

        policy = self.actor.output

        good_prob = K.sum(action * policy, axis=1)
        eligibility = K.log(good_prob + 1e-10) * advantages
        actor_loss = -K.sum(eligibility)

        entropy = K.sum(policy * K.log(policy + 1e-10), axis=1)
        entropy = K.sum(entropy)

        loss = actor_loss + 0.01*entropy
        optimizer = RMSprop(lr=self.actor_lr, rho=0.99, epsilon=0.01)
        updates = optimizer.get_updates(self.actor.trainable_weights, [], loss)
        train = K.function([self.actor.input, action, advantages], [loss], updates=updates)

        return train
    def optimizer(self):
        a = K.placeholder(shape=(None, ), dtype='int32')
        y = K.placeholder(shape=(None, ), dtype='float32')

        py_x = self.model.output

        a_one_hot = K.one_hot(a, self.action_size)
        q_value = K.sum(py_x * a_one_hot, axis=1)
        error = K.abs(y - q_value)

        quadratic_part = K.clip(error, 0.0, 1.0)
        linear_part = error - quadratic_part
        loss = K.mean(0.5 * K.square(quadratic_part) + linear_part)

        optimizer = RMSprop(lr=0.00025, epsilon=0.01)
        updates = optimizer.get_updates(self.model.trainable_weights, [], loss)
        train = K.function([self.model.input, a, y], [loss], updates=updates)

        return train
    def ActorOptimizer(self):
        action = K.placeholder(shape=[None, self.actionSpace])
        advantages = K.placeholder(shape=[None, ])

        policy = self.actor.output

        good_prob = K.sum(action * policy, axis=1)
        eligibility = K.log(good_prob + 1e-10) * advantages
        actor_loss = -K.sum(eligibility)

        entropy = K.sum(policy * K.log(policy + 1e-10), axis=1)
        entropy = K.sum(entropy)

        loss = actor_loss + 0.01*entropy
        optimizer = RMSprop(lr=self.actorLearningrate, rho=0.99, epsilon=0.01)
        updates = optimizer.get_updates(self.actor.trainable_weights, [], loss)
        train = K.function([self.actor.input, action, advantages], [loss], updates=updates)

        return train
Пример #34
0
    def optimizer(self):
        a = K.placeholder(shape=(None,), dtype='int32')
        y = K.placeholder(shape=(None,), dtype='float32')

        # history에서 예측한 q value
        prediction = self.model.output

        # history에서 취한 action을 one_hot
        # action이 0일 때 [1,0,0]
        a_one_hot = K.one_hot(a, self.action_size)

        # [1,0,0] * [0.32113, 0.1123, 0.00123] = [0.32113,0,0]과 같이 된다.
        # 위의 sum하면 0.32113
        q_value = K.sum(prediction * a_one_hot, axis=1)
        # target(reward + dicount_factor * np.max(model.predict(next_history)) )으로 받은 값이 y
        # 즉 정답인 y - q_value만큼의 오차가 발생하는데 현재 q_value의 값이 잘 못 되었을 경우 오차가 크게 발생
        # 위에서 q_value는 우리가 예측한 값이다. (q hat)
        error = K.abs(y - q_value)


        quadratic_part = K.clip(error, 0.0, 1.0)
        linear_part = error - quadratic_part
        loss = K.mean(0.5 * K.square(quadratic_part) + linear_part)

        optimizer = RMSprop(lr=0.00025, epsilon=0.01)
        # updates는 list
        updates = optimizer.get_updates(self.model.trainable_weights, [], loss)
        train = K.function([self.model.input, a, y], [loss], updates=updates)
        """Instantiates a Keras function.

            # Arguments
                inputs: List of placeholder tensors.
                outputs: List of output tensors.
                updates: List of update ops.
                **kwargs: Passed to `tf.Session.run`.

            # Returns
                Output values as Numpy arrays.

            # Raises
                ValueError: if invalid kwargs are passed in.
            """
        return train
Пример #35
0
    def build_functions(self):
        self.build_model()
                
        S = Input(shape=(self.state_size,))
        NS = Input(shape=(self.state_size,))
        A = Input(shape=(1,), dtype='int32')
        R = Input(shape=(1,), dtype='float32')
        T = Input(shape=(1,), dtype='int32')
        
        self.value_fn = kb.function([S], [self.model(S)])

        values = self.model(S)
        next_values = self.model(NS) #disconnected_grad(self.model(NS))
        future_value = kb.cast((1-T), dtype='float32') * kb.max(next_values, axis=1, keepdims=True)
        discounted_future_value = self.discount * future_value
        target = R + discounted_future_value
        cost = kb.mean(kb.pow(values - target, 2))
        opt = RMSprop(0.0001)
        params = self.model.trainable_weights
        updates = opt.get_updates(params, [], cost)
        self.train_fn = kb.function([S, NS, A, R, T], [cost], updates=updates)
Пример #36
0
params = layer4.params + layer3.params + layer2.params + layer1.params + layer0.params
grads = T.grad(cost, params)

# learning_rate = numpy.float32(0.01)
# updates = [
#     (param_i, param_i - learning_rate * grad_i)
#     for param_i, grad_i in zip(params, grads)
# ]

constraints_list = []
for param in params:
    constraints_list.append(identity())

rms = RMSprop()
updates = rms.get_updates(params, constraints_list, cost)

def read_image(address):
    img = Image.open(open(address))
    img = numpy.asarray(img, dtype='float32') / 256.
    # put image in 4D tensor of shape (1, 3, height, width)
    img = img.transpose(2, 0, 1).reshape(1, 3, 128, 48)
    return img


# img1 = read_image('/home/austin/Documents/Datasets/VIPeR/cam_a/001_45.bmp')
# img2 = read_image('/home/austin/Documents/Datasets/VIPeR/cam_b/091_90.bmp')
# f = theano.function([X1, X2, Y], [cost, layer2.similarity])
# y = numpy.asarray([-1], dtype='int32')
# [tmp, sim] = f(img1, img2, y)
Пример #37
0
    def __init__(self):
        rng = numpy.random.RandomState(23455)

        self.X1 = T.tensor4('X1', dtype='float32')
        self.X2 = T.tensor4('X2', dtype='float32')
        self.Y = T.ivector('Y')

        self.layer0 = Layer.ConvMaxPool2Layer(
            rng,
            input1=self.X1,
            input2=self.X2,
            filter_shape=[25, 3, 5, 5],
            poolsize=[2, 2]
        )

        self.layer1 = Layer.ConvMaxPool2Layer(
            rng,
            input1=self.layer0.output1,
            input2=self.layer0.output2,
            filter_shape=[25, 25, 3, 3],
            poolsize=[2, 2]
        )

        self.layer2 = Layer.SecretLayer(
            rng,
            input1=self.layer1.output1,
            input2=self.layer1.output2,
            filter_shape=[25, 25, 5, 5]
        )

        # self.layer3 = Layer.MultiConvMaxPoolLayer(
        #     rng,
        #     input=self.layer2.results,
        #     filter_shape=[25, 25, 3, 3],
        #     poolsize=(2, 2)
        # )

        self.layer3 = Layer.LocalCovLayerDropout(
            rng,
            input=self.layer2.results,
            n_in=18*9*25,
            n_out=200
        )

        self.layer4 = Layer.HiddenLayerDropout(
            rng,
            train_input=self.layer3.train_output,
            test_input=self.layer3.test_output,
            # n_in=25*24*3,
            n_in=800,
            n_out=200
        )
        # self.layer2 = Layer.ConvMaxPoolLayer(
        #     rng,
        #     input=T.abs_(self.layer1.output1 - self.layer1.output2),
        #     filter_shape=[25, 25, 3, 3],
        #     poolsize=[2, 2]
        # )
        #
        # self.layer3 = Layer.HiddenLayer(
        #     rng,
        #     input=self.layer2.output,
        #     n_in=25*18*5,
        #     n_out=500
        # )

        # self.layer5 = Layer.LogisticRegression(self.layer4.output, 500, 2)
        # self.cost = self.layer5.negative_log_likelihood(self.Y)

        self.layer5 = Layer.LogisticRegressionDropout(
            train_input=self.layer4.train_output,
            test_input=self.layer4.test_output,
            n_in=200,
            n_out=2
        )
        self.cost = self.layer5.negative_log_likelihood_train(self.Y)

        self.params = self.layer5.params + self.layer4.params + self.layer3.params + self.layer2.params + self.layer1.params + self.layer0.params
        self.grads = T.grad(self.cost, self.params)

        # learning_rate = numpy.float32(0.01)
        # updates = [
        #     (param_i, param_i - learning_rate * grad_i)
        #     for param_i, grad_i in zip(params, grads)
        # ]

        constraints_list = []
        for param in self.params:
            constraints_list.append(identity())

        rms = RMSprop()
        self.updates = rms.get_updates(self.params, constraints_list, self.cost)
Пример #38
0
    regularizers += _regularizers
    constraints += _consts
    updates += _updates
    
print('parameters:')
print(params)
print('regularizers:')
print(regularizers)
print('constrains:')
print(constraints)
print('updates:')
print(updates)

'''updates'''
optimizer = RMSprop()
_updates = optimizer.get_updates(params, constraints, train_loss)
updates += _updates

print('after RMSprop, updates:')
for update in updates:
    print(update)

train_ins = [X_train, y, weights]
test_ins = [X_test, y, weights]
predict_ins = [X_test]

'''Get functions'''
_train = K.function(train_ins, [train_loss], updates=updates)
_train_with_acc = K.function(train_ins, [train_loss, train_accuracy], updates=updates)
_predict = K.function(predict_ins, [y_test], updates=state_updates)
_test = K.function(test_ins, [test_loss])