Пример #1
0
def wgangp_loss(inputshape, noiseshape, generator, discriminator, K):
    
    opt = Adam(lr=1e-4, beta_1=0, beta_2=0.9)
    
    ϵ_input = K.placeholder(shape=(None,1,1,1))
    realimg = Input(shape=imageshape)
    noise = Input(shape=noiseshape)
    fakeimg = generator(noise)
    d_real = discriminator(realimg)
    d_fake = discriminator(fakeimg)
    d_loss1 = K.mean(d_real, axis=-1)
    d_loss2 = K.mean(d_fake, axis=-1)
    
    mixed_input = Input(shape=imageshape, tensor=ϵ_input * realimg + (1-ϵ_input) * fakeimg)
    
    grad_mixed = K.gradients(discriminator(mixed_input), [mixed_input])[0]
    norm_grad_mixed = K.sqrt(K.sum(K.square(grad_mixed), axis=[1,2,3]))
    grad_penalty = K.mean(K.square(norm_grad_mixed -1))
    
    d_loss = d_loss2 - d_loss1 + 10*grad_penalty
    d_training_updates = opt.get_updates(discriminator.trainable_weights,[], d_loss)
    d_train = K.function([realimg, noise, ϵ_input], [d_loss], d_training_updates)
    
    g_loss =  - K.mean(d_fake, axis=-1)
    g_training_updates = opt.get_updates(generator.trainable_weights,[], g_loss)
    g_train = K.function([noise], [g_loss], g_training_updates)
    
    return d_train, g_train
Пример #2
0
    def _build_optimizer(self):
        """build optimizer and loss method.

        Returns:
            [actor optimizer, critic optimizer].
        """
        # actor optimizer
        actions = K.placeholder(shape=(None, 1))
        advantages = K.placeholder(shape=(None, 1))
        action_pred = self.actor.output

        entropy = K.sum(action_pred * K.log(action_pred + 1e-10), axis=1)
        closs = K.binary_crossentropy(actions, action_pred)
        actor_loss = K.mean(closs * K.flatten(advantages)) - 0.01 * entropy

        actor_optimizer = Adam(lr=self.actor_lr)
        actor_updates = actor_optimizer.get_updates(self.actor.trainable_weights, [], actor_loss)
        actor_train = K.function([self.actor.input, actions, advantages], [], updates=actor_updates)

        # critic optimizer
        discounted_reward = K.placeholder(shape=(None, 1))
        value = self.critic.output

        critic_loss = K.mean(K.square(discounted_reward - value))

        critic_optimizer = Adam(lr=self.critic_lr)
        critic_updates = critic_optimizer.get_updates(self.critic.trainable_weights, [], critic_loss)
        critic_train = K.function([self.critic.input, discounted_reward], [], updates=critic_updates)

        return [actor_train, critic_train]
    def __init__(self, latent_dim, hidden_dim, exploration_probability, clip_value, value_decay, data,
                 batch_size, exploration_decay_rate):
        self.latent_dim = latent_dim
        self.words = data["words"]
        self.depth = 1 + max(len(w) for w in self.words)
        depth = self.depth
        self.hidden_dim = hidden_dim
        self.characters = data["characters"]
        self.charset = data["charset"]
        self.charmap = data["charmap"]
        self.wordcount = len(self.words)
        self.charcount = len(self.charset)
        self.generator = Generator("generator", latent_dim, depth, self.charcount, hidden_dim, exploration_probability,
                                   exploration_decay_rate)
        self.discriminator = Discriminator("discriminator", depth, self.charcount, hidden_dim)
        self.clip_value = np.float32(clip_value)
        self.value_decay = theano.shared(np.float32(value_decay), "value_decay")

        self.batch_size = batch_size
        self.word_vectors = np.vstack([self.word_to_vector(word).reshape((1, -1)) for word in self.words]).astype(
            np.int32)
        xreal = Input((depth,), name="xreal", dtype="int32")
        batch_n = T.iscalar("batch_n")
        srng = RandomStreams(seed=234)
        z = srng.normal(size=(batch_n, latent_dim))
        e = srng.uniform(size=(batch_n, depth), low=0, high=1)
        ex = srng.random_integers(size=(batch_n, latent_dim), low=0, high=self.charcount)
        # z = Input((latent_dim,), name="z", dtype="float32")
        # e = Input((depth,), name="e", dtype="float32")
        # ex = Input((depth,), name="ex", dtype="int32")
        # xreal = T.imatrix("xreal")
        # z = T.fmatrix("z")
        # e = T.fmatrix("e")
        # ex = T.imatrix("ex")
        _, xfake = self.generator.policy(z, e, ex)
        xfake = theano.gradient.zero_grad(xfake)
        # print("xfake: {}, {}".format(xfake, xfake.type))
        # print("xreal: {}, {}".format(xreal, xreal.type))
        _, yfake = self.discriminator.discriminator(xfake)
        _, yreal = self.discriminator.discriminator(xreal)
        dloss = T.mean(yfake, axis=None) - T.mean(yreal, axis=None)
        dconstraints = {p: ClipConstraint(self.clip_value) for p in self.discriminator.clip_params}
        dopt = Adam(1e-4)
        dupdates = dopt.get_updates(self.discriminator.params, dconstraints, dloss)

        n = z.shape[0]
        outputs_info = [T.zeros((n,), dtype='float32')]
        yfaker = T.transpose(yfake[:, ::-1], (1, 0))
        vtarget, _ = theano.scan(reward_function, outputs_info=outputs_info, sequences=yfaker,
                                 non_sequences=self.value_decay)
        vtarget = T.transpose(vtarget, (1, 0))[:, ::-1]
        # print("vtarget: {}, {}, {}".format(vtarget, vtarget.ndim, vtarget.type))
        _, vpred = self.generator.value(z, xfake)
        gloss = T.mean(T.abs_(vtarget - vpred), axis=None)
        gopt = Adam(1e-5)
        gupdates = gopt.get_updates(self.generator.params, {}, gloss)
        self.discriminator_train_function = theano.function([xreal, batch_n], [dloss], updates=dupdates)
        self.generator_train_function = theano.function([batch_n], [gloss], updates=gupdates)
        self.generator_sample_function = theano.function([batch_n], [xfake])
        self.test_function = theano.function([xreal, batch_n], [dloss, gloss])
Пример #4
0
    def actor_optimizer(self):
        action = K.placeholder(shape=[None, self.action_size])
        log_old_pi = K.placeholder(shape=[
            None,
        ])
        advantages = K.placeholder(shape=[
            None,
        ])
        mu = self.actor.output
        std = 0.1
        log_pi = -0.5 * K.square(
            (action - mu) / std) - 0.5 * K.log(2 * np.pi) - K.log(std)
        ratio = K.exp(log_pi - log_old_pi)
        cliped_ratio = K.clip(ratio, 1 - self.clip, 1 + self.clip)
        returns = K.minimum(ratio * advantages, cliped_ratio * advantages)
        returns = -K.mean(returns)

        entropy = K.sum(K.exp(log_pi) * log_pi, axis=1)
        entropy = K.mean(entropy)

        loss = returns + self.entropy * entropy

        optimizer = Adam(lr=self.actor_lr)
        updates = optimizer.get_updates(self.actor.trainable_weights, [], loss)
        train = K.function([self.actor.input, action, log_old_pi, advantages],
                           [loss],
                           updates=updates)
        return train
Пример #5
0
    def __init__(self, state_size, action_size, action_max):
        states_in = Input(shape=[state_size])
        h1 = Dense(units=H1_UNITS, activation='linear')(states_in)
        h1 = BatchNormalization()(h1)
        h1 = Activation('relu')(h1)

        h2 = Dense(units=H2_UNITS, activation='linear')(h1)
        h2 = BatchNormalization()(h2)
        h2 = Activation('relu')(h2)

        raw_actions = Dense(units=action_size, activation='tanh')(h2)
        actions = Lambda(lambda ra: ra * action_max)(raw_actions)

        self.model = Model(inputs=states_in, outputs=actions)

        # TODO:以下梯度策略算法没搞明白
        action_gradients = Input(shape=[action_size])
        loss = K.mean(-action_gradients * actions)
        # Incorporate any additional losses here (e.g. from regularizers)
        optimizer = Adam(lr=ACTOR_LR)
        updates_op = optimizer.get_updates(params=self.model.trainable_weights,
                                           loss=loss)
        self.train_fn = K.function(
            inputs=[self.model.input, action_gradients,
                    K.learning_phase()],
            outputs=[],
            updates=updates_op)
Пример #6
0
    def optimizer(self):
        """
        grad Loss = - mean_t (G_t * grad log pi(s_t, a_t) ) over an entire episide
        """

        #Placeholders
        states_pl = self.model.input
        actions_onehot_pl = K.placeholder(name='actions',
                                          shape=(None, self.output_dim))
        return_pl = K.placeholder(shape=(None, ))

        #Loss
        pi_pl = self.model.output
        pi_vec = K.sum(actions_onehot_pl * pi_pl, axis=1)
        loss_vec = -K.log(pi_vec) * K.stop_gradient(return_pl)
        loss = K.mean(loss_vec)

        #Apply updates
        opt = Adam(self.lr)
        pars = self.model.trainable_weights
        updates = opt.get_updates(loss=loss, params=pars)

        return K.function(inputs=[states_pl, actions_onehot_pl, return_pl],
                          outputs=[],
                          updates=updates)
Пример #7
0
    def actor_optimizer(self):
        action = K.placeholder(shape=(None, self.action_size))
        advantages = K.placeholder(shape=(None, ))

        policy = self.actor.output

        good_prob = K.sum(((action - policy[:, :self.action_size]) /
                           (policy[:, self.action_size:] + 1e-9))**2 +
                          K.log(policy[:, self.action_size:] + 1e-9),
                          axis=1)
        #good_prob = -K.sum(-2 * ((action - policy[:, :self.action_size]) / (policy[:, self.action_size:] + 1e-9)) - K.log(policy[:, self.action_size:] + 1e-9), axis=1)

        #good_prob = K.sum(action * policy, axis=1)
        #eligibility = K.log(good_prob + 1e-10) * K.stop_gradient(advantages)
        #eligibility = good_prob * K.stop_gradient(advantages)
        eligibility = good_prob * K.stop_gradient(advantages)
        #eligibility = -K.stop_gradient(advantages)
        loss = K.sum(eligibility)

        entropy = K.sum((policy**2) * K.log((policy**2) + 1e-10), axis=1)

        actor_loss = loss + 0.01 * entropy
        #actor_loss = K.sum((action - policy[:, :self.action_size]))# / (policy[:, self.action_size:] + 1e-9)) + K.sum(K.log(policy[:, self.action_size:] + 1e10))

        optimizer = Adam(lr=self.actor_lr)
        updates = optimizer.get_updates(self.actor.trainable_weights, [],
                                        actor_loss)
        train = K.function([self.actor.input, action, advantages], [],
                           updates=updates)
        return train
Пример #8
0
    def set_mc_optimizer_fcn(self, output_action_num):
        # Determine optimization function
        #   - Arg: input_state, action and reward
        #          -> Calculate cross entropy loss function

        # Set the action place holder
        action_pseudo = K.placeholder(shape=[None, output_action_num])
        value_normdis = K.placeholder(shape=[
            None,
        ])

        # Set the action probability - expectation of model output
        action_prob = K.sum(action_pseudo * self.policy_model.output, axis=1)

        # Set the cross entropy loss function
        cross_entropy = K.log(action_prob) * value_normdis
        loss = -K.sum(cross_entropy)

        # Declare train function with optimizer
        optimizer = Adam(lr=self.conf_lrn_rate)
        updates = optimizer.get_updates(self.policy_model.trainable_weights,
                                        [], loss)
        train_fcn = K.function(
            [self.policy_model.input, action_pseudo, value_normdis], [],
            updates=updates)
        return train_fcn
Пример #9
0
    def optimizer(self):
        """
        gradL = - E_{t} * ( Adv(t)*grad_{\theta} log(\pi(s_t, a_t)) )
        
        where E_{t} is the average over an episode
        
        """

        #Placeholders
        state_pl = self.model.input
        action_onehot_pl = K.placeholder(name='action_onehot',
                                         shape=(None, self.output_dim))
        adv_pl = K.placeholder(name='advantage', shape=(None, ))

        #Set up loss
        pi_pl = self.model.output
        pi_vec = K.sum(action_onehot_pl * pi_pl, axis=1)
        loss_vec = -K.log(pi_vec) * K.stop_gradient(adv_pl)
        loss = K.mean(loss_vec)

        #Get updates
        opt = Adam(self.lr)
        pars = self.model.trainable_weights
        updates = opt.get_updates(loss=loss, params=pars)

        return K.function(inputs=[state_pl, action_onehot_pl, adv_pl],
                          outputs=[],
                          updates=updates)
Пример #10
0
def compile_train_fn(model, learning_rate=1e-5):
    """ Build the CTC training routine for speech models.
    Args:
        model: A keras model (built=True) instance
    Returns:
        train_fn (theano.function): Function that takes in acoustic inputs,
            and updates the model. Returns network outputs and ctc cost
    """
    logger.info("Building train_fn")
    acoustic_input = model.inputs[0]
    network_output = model.outputs[0]
    label = K.placeholder(ndim=2, dtype='int32')
    label_lens = K.placeholder(ndim=2, dtype='int32')
    ctc_input_lengths = K.placeholder(ndim=2, dtype='int32')

    # ctc_cost = K.mean(K.ctc_batch_cost(label, network_output, ctc_input_lengths, label_lens))
    ctc_cost = K.ctc_batch_cost(label, network_output, ctc_input_lengths,
                                label_lens)

    trainable_vars = model.trainable_weights

    optimz = Adam(lr=1e-4, beta_1=0.9, beta_2=0.999, decay=0.0, epsilon=10e-8)
    # optimz = SGD(lr=1e-03, clipnorm=100, decay=1e-6, momentum=0.9, nesterov=True)
    updates = list(optimz.get_updates(trainable_vars, [], ctc_cost))
    train_fn = K.function([
        acoustic_input, ctc_input_lengths, label, label_lens,
        K.learning_phase()
    ], [network_output, ctc_cost], updates)
    return train_fn
def training_function(training_model):
    def sim(v1, v2):
        return K.sum(v1 * v2, axis=-1)

    target_out = training_model.outputs[0]
    relevant_out = training_model.outputs[1]
    violate_out = training_model.outputs[2]
    margin = K.placeholder(shape=(None, ))

    loss = K.abs(margin + sim(target_out, violate_out) -
                 sim(target_out, relevant_out))
    # adam = Adadelta(lr=0.01)
    adam = Adam(lr=1e-4)
    updates = adam.get_updates(params=training_model.trainable_weights,
                               loss=loss)

    return K.function(
        inputs=[
            training_model.inputs[0],  # target_inputs: p_input
            training_model.inputs[1],  #                l_input
            training_model.inputs[2],  # relevant_input
            training_model.inputs[3],  # violate_input
            margin
        ],  # traj distance
        outputs=[loss],
        updates=updates)
Пример #12
0
    def build_actor_optimizer(self):

        action = keras.backend.placeholder(shape=[None, self.action_size])

        advantages = keras.backend.placeholder(shape=[
            None,
        ])

        policy = self.actor.output

        action_prob = keras.backend.sum(action * policy, axis=1)

        cross_entropy = keras.backend.log(action_prob + 1e-6) * advantages

        cross_entropy = -keras.backend.mean(cross_entropy)

        entropy = keras.backend.sum(policy * keras.backend.log(policy + 1e-6),
                                    axis=1)

        entropy = keras.backend.mean(entropy)

        loss = cross_entropy + self.entropy * entropy

        optimizer = Adam(lr=self.actor_lr)

        updates = optimizer.get_updates(self.actor.trainable_weights, [], loss)

        train = keras.backend.function(
            [self.actor.input[0], self.actor.input[1], action, advantages],
            [loss],
            updates=updates)

        return train
Пример #13
0
    def optimizer(self):
        """ 
           The gradient of the loss function L is
           
           \grad L = \grad_pars V (  V(s_t) - Q(s_t, a_t) + \alpha*log( \pi(s_t, a_t) )  )
          
        """

        #Find terms in bracket
        S_pl = self.model.input
        Pi_pl = K.placeholder(shape=(None, 1))
        Q_pl = K.placeholder(shape=(None, 1))
        V_pl = self.model.output
        temp = V_pl - Q_pl + self.alpha * K.log(Pi_pl)

        #Find gradient
        pars = self.model.trainable_weights
        grads = tf.gradients(V_pl, pars, -temp)  #scalar multiply by temp

        #Clip gradients
        if self.clipnorm == True:
            grads = tf.clip_by_global_norm(grads, self.clipnorm_val)[0]

        #Do learning
        #To get keras to apply updates given a custom gradients (i.e. run the above line) I had to alter the source
        #Code. It was easy to do. See line X in the get_updates function.
        opt = Adam(self.lr)
        loss = grads  #placeholder, keras doesn't use it
        updates = opt.get_updates(loss=loss, params=pars, grads=grads)

        #This function will apply updates when called
        func = K.function(inputs=[S_pl, Q_pl, Pi_pl],
                          outputs=[],
                          updates=updates)
        return func
Пример #14
0
    def actor_optimizer(self):
        action = K.placeholder(shape=(None, 1))
        advantages = K.placeholder(shape=(None, 1))
        #self.model.outputs
        mu, sigma_sq = self.actor.output
        #mu, sigma_sq = self.actor.predict(state)
        #entropy of Gaussian
        entropy_loss = ENTROPY_BETA * (
            -K.mean(0.5 * (K.log(2. * np.pi * sigma_sq) + 1.)))

        #Prob Density Fn (PDF)
        #if sigma_sq is not None:
        #problem with clip, don't use TF tensor as bool error
        #sigma_sq = np.clip(sigma_sq,1e-3, None)
        p1 = -((action - mu)**2) / (2 * K.clip(sigma_sq, 1e-3, None)
                                    )  #clip min only
        p2 = -K.log(K.sqrt(2 * np.pi * sigma_sq))
        #log prob(a|s) given theta
        log_prob = p1 + p2
        #log_prob * score fn = advantage
        log_prob_v = advantages * log_prob
        loss_policy_v = -K.mean(log_prob_v)
        #sum losses
        loss_v = loss_policy_v + entropy_loss
        optimizer = Adam(lr=self.actor_lr)
        updates = optimizer.get_updates(self.actor.trainable_weights, [],
                                        loss_v)
        train = K.function([self.actor.input, action, advantages], [],
                           updates=updates)

        return train
Пример #15
0
    def actor_optimizer(self):
        action = K.placeholder(shape=(None, self.action_size))
        advantages = K.placeholder(shape=(None, ))

        policy = self.actor.output

        good_prob = K.sum(action * policy, axis=1)
        eligibility = K.log(good_prob + 1e-10) * K.stop_gradient(
            advantages)  # 1e-10 to 1e-8
        loss = -K.sum(eligibility)

        entropy = K.sum(policy * K.log(policy + 1e-10),
                        axis=1)  # 1e-10 to 1e-8

        actor_loss = loss + 0.01 * entropy

        optimizer = Adam(lr=self.actor_lr)
        #updates = optimizer.get_updates(self.actor.trainable_weights, [], actor_loss)
        #train = K.function([self.actor.input, action, advantages], [], updates=updates)

        updates = optimizer.get_updates(params=self.actor.trainable_weights,
                                        loss=actor_loss)
        train = K.function([self.actor.input, action, advantages], [],
                           updates=updates)
        return train
Пример #16
0
    def critic_optimizer(self):
        discounted_reward = K.placeholder(shape=(None, ),
                                          name="discounted_reward")
        value = self.critic.output
        critic_loss = K.square(discounted_reward - value)

        optimizer = Adam(lr=self.critic_lr,
                         beta_1=self.beta_1,
                         beta_2=self.beta_2,
                         epsilon=self.epsilon)
        with self.__global_score_list_lock:
            updates = optimizer.get_updates(self.critic.trainable_weights, [],
                                            critic_loss)

        weights_0 = self.actor.trainable_weights[0]
        weights_1 = self.actor.trainable_weights[1]
        weights_2 = self.actor.trainable_weights[2]
        weights_3 = self.actor.trainable_weights[3]

        train = K.function(
            [self.critic.input, discounted_reward],
            [critic_loss, value, weights_0, weights_1, weights_2, weights_3],
            updates=updates)

        global_logger.info("discounted_reward: {0}".format(discounted_reward))
        global_logger.info("critic ourput value: {0}".format(value))
        global_logger.info("self.critic.trainable_weights: {0}".format(
            self.critic.trainable_weights))

        return train
Пример #17
0
    def compile_train_fn(self, learning_rate=2e-4):
        """ Build the CTC training routine for speech models.
        Args:
            learning_rate (float)
        Returns:
            train_fn (theano.function): Function that takes in acoustic inputs,
                and updates the model. Returns network outputs and ctc cost
        """
        logger.info("Building train_fn")
        f_inputs = [self.acoustic_input, self.ctc_in_lens]
        f_outputs = []
        f_updates = []
        for branch in self.branch_outputs:
            labels, label_lens = self.branch_labels[branch.name]
            f_inputs.append(labels)
            f_inputs.append(label_lens)

            if K.backend() == 'tensorflow':
                network_output = branch.output
                ctc_cost = K.mean(
                    K.ctc_batch_cost(labels, network_output, self.ctc_in_lens,
                                     label_lens))
            else:
                network_output = branch.output.dimshuffle((1, 0, 2))
                ctc_cost = ctc_th.gpu_ctc(network_output, labels,
                                          self.ctc_in_lens).mean()

            f_outputs.extend([network_output, ctc_cost])
            trainable_vars = self.branch_vars[branch.name]
            optmz = Adam(lr=learning_rate, clipnorm=100)
            f_updates.extend(optmz.get_updates(trainable_vars, [], ctc_cost))

        f_inputs.append(K.learning_phase())
        self.train_fn = K.function(f_inputs, f_outputs, f_updates)
        return self.train_fn
Пример #18
0
    def build_actor(self, state_size, action_size):
        h1_size = 64
        h2_size = 32
        h3_size = 16

        states = Input(shape=[state_size], name='states')
        h1 = Dense(h1_size, activation='relu', name='hidden1')(states)
        h2 = Dense(h2_size, activation='relu', name='hidden2')(h1)
        h3 = Dense(h3_size, activation='relu', name='hidden3')(h2)
        # relu to make the min zero, step function in task
        # has safety to reduce high inputs to max speed
        actions_0_1 = Dense(action_size,
                            activation='sigmoid',
                            name='actions_0_1')(h3)

        actions = Lambda(lambda x: (x * self.action_range) + self.action_low,
                         name='output_actions')(actions_0_1)

        self.model = Model(inputs=states, outputs=actions)

        action_gradients = Input(shape=([self.action_size]),
                                 name='action_grads')
        loss = K.mean(-action_gradients * actions)
        optimizer = Adam()
        updates_op = optimizer.get_updates(params=self.model.trainable_weights,
                                           loss=loss)
        self.train_fn = K.function(
            inputs=[self.model.input, action_gradients,
                    K.learning_phase()],
            outputs=[],
            updates=updates_op)
Пример #19
0
    def model_optimizer(self):
        target = K.placeholder(shape=[None, self.action_size])
        weight = K.placeholder(shape=[
            None,
        ])

        # hubber loss에 대한 코드입니다.

        clip_delta = 1.0

        pred = self.model.output

        err = target - pred

        cond = K.abs(err) < clip_delta

        squared_loss = 0.5 * K.square(err)
        linear_loss = clip_delta * (K.abs(err) - 0.5 * clip_delta)

        loss1 = tf.where(cond, squared_loss, linear_loss)

        # 기존 hubber loss에 importance sampling ratio를 곱하는 형태의 PER loss를 정의합니다.
        weighted_loss = tf.multiply(tf.expand_dims(weight, -1), loss1)

        loss = K.mean(weighted_loss, axis=-1)

        optimizer = Adam(lr=self.learning_rate)
        updates = optimizer.get_updates(self.model.trainable_weights, [], loss)

        train = K.function([self.model.input, target, weight], [err],
                           updates=updates)

        return train
Пример #20
0
    def build_actor_optimizer(self):

        action = keras.backend.placeholder(shape=[None, self.action_size])

        advantages = keras.backend.placeholder(shape=[
            None,
        ])

        policy = self.actor.output

        action_prob = keras.backend.sum(action * policy, axis=1)

        log_prob_actions_v = keras.backend.log(action_prob + 1e-6) * advantages

        loss_p = -keras.backend.sum(log_prob_actions_v)

        loss = keras.backend.mean(loss_p)

        optimizer = Adam(lr=self.actor_lr)

        updates = optimizer.get_updates(self.actor.trainable_weights, [], loss)

        train = keras.backend.function(
            [self.actor.input[0], self.actor.input[1], action, advantages],
            [loss],
            updates=updates)

        return train
Пример #21
0
    def __build_train_fn(self):
        action_prob_placeholder = self.speaker_model.output
        action_onehot_placeholder = K.placeholder(shape=(None,
                                                         self.alphabet_size),
                                                  name="action_onehot")
        reward_placeholder = K.placeholder(shape=(None, ), name="reward")
        action_prob = K.sum(action_prob_placeholder *
                            action_onehot_placeholder,
                            axis=1)
        log_action_prob = K.log(action_prob)
        loss = -log_action_prob * reward_placeholder

        ## Add entropy to the loss
        entropy = K.sum(action_prob_placeholder *
                        K.log(action_prob_placeholder + 1e-10),
                        axis=1)
        entropy = K.sum(entropy)

        loss = loss + 0.1 * entropy
        loss = K.mean(loss)
        adam = Adam()
        updates = adam.get_updates(params=self.speaker_model.trainable_weights,
                                   loss=loss)
        self.train_fn = K.function(inputs=[
            self.speaker_model.input, action_onehot_placeholder,
            reward_placeholder
        ],
                                   outputs=[loss, entropy],
                                   updates=updates)
Пример #22
0
 def critic_optimizer(self):
     target = K.placeholder(shape=[None, ])
     loss = K.mean(K.square(target - self.critic.output))
     optimizer = Adam(lr=self.critic_lr)
     updates = optimizer.get_updates(self.critic.trainable_weights, [], loss)
     train = K.function([self.critic.input, target], [], updates=updates)
     return train
Пример #23
0
 def __build_train_fn(self):
     # def loss(discount_r):
     #     def f(y_true, y_pred):
     #         action_prob = K.sum(y_true*y_pred, axis=1)
     #         action_prob = K.log(action_prob)
     #         policy_loss = -K.sum(discount_r) * K.mean(action_prob)
     #         policy_loss = K.print_tensor(policy_loss)
     #         return policy_loss
     #     return f
     # discount_reward_ = Input(shape=(1,))
     # state = Input(shape=(6400,))
     # pi_action = self.model(state)
     # model = Model([state, discount_reward_], pi_action)
     # adam = Adam(lr=1e-4)
     # rmsprop = RMSprop(lr=1e-4 ,clipnorm=1) #10
     # model.compile(optimizer=rmsprop, loss=loss(discount_reward_))
     action_prob_placeholder = self.model.output
     action_onehot_placeholder = K.placeholder(shape=(None, 2))
     discount_reward_placeholder = K.placeholder(shape=(None,))
     action_prob = K.sum(action_prob_placeholder * action_onehot_placeholder, axis=1)
     log_action_prob = K.log(action_prob)
     loss = - log_action_prob * discount_reward_placeholder
     loss = K.sum(loss)
     adam = Adam(lr=1e-4)#,decay = 0.99)
     rmsprop = RMSprop(lr=1e-4, decay=0.99)
     updates = adam.get_updates(params=self.model.trainable_weights,
                                loss=loss)
     self.train_fn = K.function(inputs=[self.model.input,
                                        action_onehot_placeholder,
                                        discount_reward_placeholder],
                                outputs=[loss],
                                updates=updates)
Пример #24
0
    def build_critic_optimizer(self):

        y = keras.backend.placeholder(shape=(None, 1))

        value = self.critic.output

        # # Huber loss

        error = tf.abs(y - value)

        quadratic = keras.backend.clip(error, 0.0, 1.0)

        linear = error - quadratic

        loss = keras.backend.mean(0.5 * keras.backend.square(quadratic) +
                                  linear)

        optimizer = Adam(lr=self.critic_lr)

        updates = optimizer.get_updates(self.critic.trainable_weights, [],
                                        loss)

        train = keras.backend.function(
            [self.critic.input[0], self.critic.input[1], y], [loss],
            updates=updates)

        return train
Пример #25
0
    def actor_optimizer(self):
        action = K.placeholder(shape=(None, 1))
        advantages = K.placeholder(shape=(None, 1))

        # mu = K.placeholder(shape=(None, self.action_size))
        # sigma_sq = K.placeholder(shape=(None, self.action_size))

        mu, sigma_sq = self.actor.output

        pdf = 1. / K.sqrt(2. * np.pi * sigma_sq) * K.exp(
            -K.square(action - mu) / (2. * sigma_sq))
        log_pdf = K.log(pdf + K.epsilon())
        entropy = K.sum(0.5 * (K.log(2. * np.pi * sigma_sq) + 1.))

        exp_v = log_pdf * advantages

        exp_v = K.sum(exp_v + 0.01 * entropy)
        actor_loss = -exp_v

        optimizer = Adam(lr=self.actor_lr)
        updates = optimizer.get_updates(self.actor.trainable_weights, [],
                                        actor_loss)

        train = K.function([self.actor.input, action, advantages], [],
                           updates=updates)
        return train
Пример #26
0
    def build_model(self, target=False):
        states = Input(shape=(self.state_size, ), name='states')
        net = Dense(units=40, activation='relu')(states)
        net = Dense(units=20, activation='relu')(net)
        actions = Dense(units=self.action_size,
                        activation='tanh',
                        name='actions')(net)

        if target:
            self.target = Model(inputs=states, outputs=actions)
            return

        self.model = Model(inputs=states, outputs=actions)

        action_gradients = Input(shape=(self.action_size, ))
        loss = K.mean(-action_gradients * actions)

        optimizer = Adam(lr=0.0001)
        updates_op = optimizer.get_updates(params=self.model.trainable_weights,
                                           loss=loss)
        self.train_fn = K.function(
            inputs=[self.model.input, action_gradients,
                    K.learning_phase()],
            outputs=[],
            updates=updates_op)
Пример #27
0
def K_function_train():
    # K.function을 이용해서, training을 할 수 있다.
    from keras import backend as K
    from keras.layers.core import Dense
    from keras.models import Sequential
    from keras.optimizers import Adam
    y = K.placeholder(shape=[None, 1])
    model = Sequential()
    model.add(Dense(24, input_dim=3, activation='relu'))
    model.add(Dense(1))
    loss = K.mean(K.square(model.output - y))

    optimizer = Adam(lr=0.001)
    updates = optimizer.get_updates(model.trainable_weights, [], loss)

    train = K.function([model.input, y], [model.output, loss], updates=updates)

    # train 해보기
    data = np.random.randn(2, 3)
    target = np.random.randn(2, 1)
    output = model.predict(data)

    for i in range(1000):
        temp = train([data, target])
        if i % 100 == 0:
            print(i, temp)

    # 결과 확인
    print('data', data)
    print('target', target)
    print('predict', model.predict(data))
Пример #28
0
    def actor_optimizer(self):
        #placeholders for actions and advantages parameters coming in
        action = K.placeholder(shape=(None, 1))
        advantages = K.placeholder(shape=(None, 1))

        # mu = K.placeholder(shape=(None, self.action_size))
        # sigma_sq = K.placeholder(shape=(None, self.action_size))

        mu, sigma_sq = self.actor.output

        #defined a custom loss using PDF formula, K.exp is element-wise exponential
        pdf = 1. / K.sqrt(2. * np.pi * sigma_sq) * K.exp(-K.square(action - mu) / (2. * sigma_sq))
        #log pdf why?
        log_pdf = K.log(pdf + K.epsilon())
        #entropy looks different from log(sqrt(2 * pi * e * sigma_sq))
        #Sum of the values in a tensor, alongside the specified axis.
        entropy = K.sum(0.5 * (K.log(2. * np.pi * sigma_sq) + 1.))

        exp_v = log_pdf * advantages
        #entropy is made small before added to exp_v
        exp_v = K.sum(exp_v + 0.01 * entropy)
        #loss is a negation
        actor_loss = -exp_v

        #use custom loss to perform updates with Adam, ie. get gradients
        optimizer = Adam(lr=self.actor_lr)
        updates = optimizer.get_updates(self.actor.trainable_weights, [], actor_loss)
        #adjust params with custom train function
        train = K.function([self.actor.input, action, advantages], [], updates=updates)
        #return custom train function
        return train
Пример #29
0
    def __init__(self, render=False):
        self.render = True

        self.state_dim = env.observation_space.n
        self.action_count = env.action_space.n
        self.update_frequency = 5
        self.discount_factor = 0.9
        self.running_variance = RunningVariance()

        print('state_dim: {}, action_count: {}, update_frequency: {} '.format(
        self.state_dim, self.action_count, self.update_frequency))

        #actor network
        actor = Sequential()
        actor.add(Dense(16, input_shape=(self.state_dim,), activation='relu', kernel_initializer='he_uniform'))
        actor.add(Dense(self.action_count, activation='softmax', kernel_initializer='he_uniform'))
        actor.summary()
        self.actor = actor

        #actor_optimizer
        action = K.placeholder(shape=[None, self.action_count])
        advantage = K.placeholder(shape=[None, ])

        action_prob = K.sum(action - self.actor.output, axis=1)
        cross_entropy = K.log(action_prob) * advantage
        loss = -K.sum(cross_entropy)

        optimizer = Adam(lr=0.005)
        updates = optimizer.get_updates(self.actor.trainable_weights, [], loss)
        train = K.function([self.actor.input, action, advantage], [], updates=updates)
        self.actor_optimizer = train

        #critic network
        critic = Sequential()
        critic.add(Dense(16, input_shape=(self.state_dim,), activation='relu', kernel_initializer='he_uniform'))
        critic.add(Dense(1, activation='linear', kernel_initializer='he_uniform'))
        critic.summary()
        self.critic = critic

        #critic optimizer
        target = K.placeholder(shape=[None, ])
        loss = K.mean(K.square(target - self.critic.output))

        optimizer = Adam(lr=0.001)
        updates = optimizer.get_updates(self.critic.trainable_weights, [], loss)
        train = K.function([self.critic.input, target], [], updates=updates)
        self.critic_optimizer = train
Пример #30
0
def scores_from_adgan_generator(x_test,
                                prior_gen,
                                generator,
                                n_seeds=8,
                                k=5,
                                z_lr=0.25,
                                gen_lr=5e-5):
    generator.trainable = True
    initial_weights = generator.get_weights()

    gen_opt = Adam(lr=gen_lr, beta_1=0.5)
    z_opt = Adam(lr=z_lr, beta_1=0.5)

    x_ph = K.placeholder((1, ) + x_test.shape[1:])
    z = K.variable(prior_gen(1))
    rec_loss = K.mean(K.square(x_ph - generator(z)))
    z_train_fn = K.function([x_ph], [rec_loss],
                            updates=z_opt.get_updates(rec_loss, [z]))
    g_train_fn = K.function([x_ph, K.learning_phase()], [rec_loss],
                            updates=gen_opt.get_updates(
                                rec_loss, generator.trainable_weights))

    gen_opt_initial_params = gen_opt.get_weights()
    z_opt_initial_params = z_opt.get_weights()

    scores = []
    for x in x_test:
        x = np.expand_dims(x, axis=0)
        losses = []
        for j in range(n_seeds):
            K.set_value(z, prior_gen(1))
            generator.set_weights(initial_weights)
            gen_opt.set_weights(gen_opt_initial_params)
            z_opt.set_weights(z_opt_initial_params)
            for _ in range(k):
                z_train_fn([x])
                g_train_fn([x, 1])
            loss = z_train_fn([x])[0]
            losses.append(loss)

        score = -np.mean(losses)
        scores.append(score)

    return np.array(scores)
Пример #31
0
    def critic_optimizer(self):
        discounted_reward = K.placeholder(shape=(None, ))

        value = self.critic.output

        loss = K.mean(K.square(discounted_reward - value))

        optimizer = Adam(lr=self.critic_lr)
        updates = optimizer.get_updates(self.critic.trainable_weights, [], loss)
        train = K.function([self.critic.input, discounted_reward], [], updates=updates)
        return train
    def optimizer(self):
        action = K.placeholder(shape=[None, 5])
        discounted_rewards = K.placeholder(shape=[None, ])

        # Calculate cross entropy error function
        action_prob = K.sum(action * self.model.output, axis=1)
        cross_entropy = K.log(action_prob) * discounted_rewards
        loss = -K.sum(cross_entropy)

        # create training function
        optimizer = Adam(lr=self.learning_rate)
        updates = optimizer.get_updates(self.model.trainable_weights, [],
                                        loss)
        train = K.function([self.model.input, action, discounted_rewards], [],
                           updates=updates)

        return train
Пример #33
0
    def actor_optimizer(self):
        action = K.placeholder(shape=(None, self.action_size))
        advantages = K.placeholder(shape=(None, ))

        policy = self.actor.output

        good_prob = K.sum(action * policy, axis=1)
        eligibility = K.log(good_prob + 1e-10) * K.stop_gradient(advantages)
        loss = -K.sum(eligibility)

        entropy = K.sum(policy * K.log(policy + 1e-10), axis=1)

        actor_loss = loss + 0.01*entropy

        optimizer = Adam(lr=self.actor_lr)
        updates = optimizer.get_updates(self.actor.trainable_weights, [], actor_loss)
        train = K.function([self.actor.input, action, advantages], [], updates=updates)
        return train
Пример #34
0
    regularizers += _regularizers
    constraints += _consts
    updates += _updates

print("parameters:")
print(params)
print("regularizers:")
print(regularizers)
print("constrains:")
print(constraints)
print("updates:")
print(updates)

"""updates"""
optimizer = Adam()
_updates = optimizer.get_updates(params, constraints, train_loss)
updates += _updates

print("after Adam, updates:")
for update in updates:
    print(update)

train_ins = [X_train, y, weights]
test_ins = [X_test, y, weights]
predict_ins = [X_test]

"""Get functions"""
print("complie: _train")
_train = K.function(train_ins, [train_loss], updates=updates)
print("complie: _train_with_acc")
_train_with_acc = K.function(train_ins, [train_loss, train_accuracy], updates=updates)