Exemplo n.º 1
0
class Worker_pop(object):
    def __init__(self, name, pops, pop, sub, sub_size, global_pop, Factor=2):
        """
        这个类用来定义种群网络
        :param name: 用来表示所在种群的名称
        :param pops: 用来表示所有的线程
        :param pop: 用来表示所在的种群
        :param sub: 用来表示所在的子集
        :param sub_size: 用来表示所在种群的大小
        :param global_pop: 用来存储对应的全局网络
        :param Factor: 用来表示所选取的offset用于更新网络的个数因子
        """
        with tf.variable_scope(name):
            self.name = name
            self.pops = pops
            self.pop = pop
            self.sub = sub
            self.sub_size = sub_size
            self.N_POP_size = N_POP
            self.C_POP_size = math.floor(N_POP / Factor)
            with tf.variable_scope('mean'):
                self.mean = tf.Variable(tf.truncated_normal([self.sub_size, ], mean=0.0, stddev=0.01), dtype=tf.float32,
                                        name=name + '_mean')
            with tf.variable_scope('cov'):
                self.cov = tf.Variable(1.0 * tf.eye(self.sub_size), dtype=tf.float32, name=name + '_cov')
            self.mvn = MultivariateNormalFullCovariance(loc=self.mean, covariance_matrix=abs(self.cov))
            self.make_kid = self.mvn.sample(self.N_POP_size)
            self.tfkids_fit = tf.placeholder(tf.float32, [self.C_POP_size, ])
            self.tfkids = tf.placeholder(tf.float32, [self.C_POP_size, self.sub_size])
            self.loss = -tf.reduce_mean(self.mvn.log_prob(self.tfkids) * self.tfkids_fit)
            self.train_op = tf.train.AdamOptimizer().minimize(self.loss)
            self.mean_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=name + '/mean')
            self.cov_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=name + '/cov')

            with tf.name_scope('pull'):
                self.pull_mean_op = self.mean.assign(global_pop.mean)
                self.pull_cov_op = self.cov.assign(global_pop.cov)
            with tf.name_scope('push'):
                self.push_mean_op = global_pop.mean.assign(self.mean)
                self.push_cov_op = global_pop.cov.assign(self.cov)
            with tf.name_scope('restart'):
                self.re_mean_op = self.mean.assign(
                    tf.Variable(tf.truncated_normal([self.sub_size, ], mean=0.0, stddev=0.01), dtype=tf.float32))
                self.re_cov_op = self.cov.assign(tf.Variable(1.0 * tf.eye(self.sub_size), dtype=tf.float32))

    def _update_net(self):
        sess.run([self.push_mean_op, self.push_cov_op])

    def _pull_net(self):
        sess.run([self.pull_mean_op, self.pull_cov_op])

    def _restart_net(self):
        sess.run([self.re_mean_op, self.re_cov_op])
Exemplo n.º 2
0
class Worker_pop(object):
    def __init__(self, name, data):
        with tf.variable_scope(name):
            self.name = name
            self.DNA_size = data.getDNA_size()
            # self.mean_params, self.cov_params, self.mean, self.cov = self._creat_net(name, data.getDNA_size())
            with tf.variable_scope('mean'):
                self.mean = tf.Variable(tf.truncated_normal([self.DNA_size, ], stddev=0.1, mean=0.5), dtype=tf.float32,
                                        name=name + '_mean')
            with tf.variable_scope('cov'):
                self.cov = tf.Variable(1.0 * tf.eye(self.DNA_size), dtype=tf.float32, name=name + '_cov')
            self.mvn = MultivariateNormalFullCovariance(loc=self.mean, covariance_matrix=abs(self.cov))
            self.make_kid = self.mvn.sample(N_POP)
            self.tfkids_fit = tf.placeholder(tf.float32, [N_POP, ])
            self.tfkids = tf.placeholder(tf.float32, [N_POP, self.DNA_size])
            # self.loss = -tf.reduce_mean(
            #     self.mvn.log_prob(self.tfkids) * self.tfkids_fit + 0.01 * self.mvn.log_prob(
            #         self.tfkids) * self.mvn.prob(
            #         self.tfkids))
            self.loss = -tf.reduce_mean(self.mvn.log_prob(self.tfkids) * self.tfkids_fit)
            self.train_op = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(
                self.loss)  # compute and apply gradients for mean and cov
            self.mean_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=name + '/mean')
            self.cov_params = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=name + '/cov')

    def _update_net(self):
        lock_push.acquire()
        self.push_mean_params_op = [g_p.assign(l_p) for g_p, l_p in zip(global_pop.mean_params, self.mean_params)]
        self.push_cov_params_op = [g_p.assign(l_p) for g_p, l_p in zip(global_pop.cov_params, self.cov_params)]
        sess.run([self.push_mean_params_op, self.push_cov_params_op])
        # self.update_mean = self.train_op.apply_gradients(zip(self.mean_grads, global_pop.mean_params))
        # self.update_cov = self.train_op.apply_gradients(zip(self.cov_grads, global_pop.cov_params))
        # sess.run([self.update_mean, self.update_cov])  # local grads applies to global net
        lock_push.release()

    def _pull_net(self):
        lock_pull.acquire()
        self.pull_mean_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.mean_params, global_pop.mean_params)]
        self.pull_cov_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.cov_params, global_pop.cov_params)]
        sess.run([self.pull_mean_params_op, self.pull_cov_params_op])
        lock_pull.release()
Exemplo n.º 3
0

# build multivariate distribution
mean = tf.Variable(tf.random_normal([
    2,
], 13., 1.), dtype=tf.float32)
cov = tf.Variable(5. * tf.eye(DNA_SIZE), dtype=tf.float32)
mvn = MultivariateNormalFullCovariance(loc=mean, covariance_matrix=cov)
make_kid = mvn.sample(N_POP)  # sampling operation

# compute gradient and update mean and covariance matrix from sample and fitness
tfkids_fit = tf.placeholder(tf.float32, [
    N_POP,
])
tfkids = tf.placeholder(tf.float32, [N_POP, DNA_SIZE])
loss = -tf.reduce_mean(mvn.log_prob(tfkids) * tfkids_fit)  # log prob * fitness
train_op = tf.train.GradientDescentOptimizer(LR).minimize(
    loss)  # compute and apply gradients for mean and cov

sess = tf.Session()
sess.run(tf.global_variables_initializer())  # initialize tf variables

# something about plotting (can be ignored)
n = 300
x = np.linspace(-20, 20, n)
X, Y = np.meshgrid(x, x)
Z = np.zeros_like(X)
for i in range(n):
    for j in range(n):
        Z[i, j] = get_fitness(np.array([[x[i], x[j]]]))
plt.contourf(X, Y, -Z, 100, cmap=plt.cm.rainbow)
Exemplo n.º 4
0
def learn(
    base_env,
    policy_fn,
    *,
    max_fitness,  # has to be negative, as cmaes consider minization
    popsize,
    gensize,
    bounds,
    sigma,
    eval_iters,
    timesteps_per_actorbatch,
    max_timesteps=0,
    max_episodes=0,
    max_iters=0,
    max_seconds=0,
    seed=0,
    optim_stepsize=3e-4,
    schedule='constant'  # annealing for stepsize parameters (epsilon and adam)
):
    set_global_seeds(seed)
    # Setup losses and stuff
    # ----------------------------------------
    ob_space = base_env.observation_space
    ac_space = base_env.action_space
    pi = policy_fn("pi", ob_space,
                   ac_space)  # Construct network for new policy
    backup_pi = policy_fn(
        "backup_pi", ob_space, ac_space
    )  # Construct a network for every individual to adapt during the es evolution

    sol_dim = int(
        np.sum([
            np.prod(v.get_shape().as_list())
            for v in pi.get_trainable_variables()
        ]))
    pop_size = tf.placeholder(dtype=tf.float32, shape=[])
    lrmult = tf.placeholder(
        name='lrmult', dtype=tf.float32,
        shape=[])  # learning rate multiplier, updated with schedule

    tfkids_fit = tf.placeholder(dtype=tf.float32, shape=[
        popsize,
    ])
    tfkids = tf.placeholder(dtype=tf.float32, shape=[popsize, sol_dim])

    tfmean = tf.Variable(initial_value=tf.random_normal([
        sol_dim,
    ], 0., 1.),
                         dtype=tf.float32)
    tfcov = tf.Variable(initial_value=tf.eye(sol_dim), dtype=tf.float32)
    mvn = MultivariateNormalFullCovariance(loc=tfmean, covariance_matrix=tfcov)

    loss = -tf.reduce_mean(mvn.log_prob(tfkids) * tfkids_fit)
    train_op = tf.train.GradientDescentOptimizer(lrmult).minimize(loss)

    optimize = U.function([tfkids, tfkids_fit, lrmult], [train_op])
    reproduce = U.function([pop_size], [mvn.sample(popsize)])
    get_mean = U.function([], [tfmean])

    input_mean = tf.placeholder(dtype=tf.float32, shape=[
        sol_dim,
    ])
    assign_weights_to_mean = U.function([input_mean],
                                        [tf.assign(tfmean, input_mean)])

    U.initialize()

    pi_set_from_flat_params = U.SetFromFlat(pi.get_trainable_variables())
    pi_get_flat_params = U.GetFlat(pi.get_trainable_variables())

    global timesteps_so_far, episodes_so_far, iters_so_far, \
        tstart, lenbuffer, rewbuffer, best_fitness, eval_seq
    episodes_so_far = 0
    timesteps_so_far = 0
    iters_so_far = 0
    tstart = time.time()
    lenbuffer = deque(maxlen=100)  # rolling buffer for episode lengths
    rewbuffer = deque(maxlen=100)  # rolling buffer for episode rewards

    assign_backup_eq_new = U.function(
        [], [],
        updates=[
            tf.assign(backup_v, newv) for (
                backup_v,
                newv) in zipsame(backup_pi.get_variables(), pi.get_variables())
        ])
    assign_new_eq_backup = U.function(
        [], [],
        updates=[
            tf.assign(newv, backup_v)
            for (newv, backup_v
                 ) in zipsame(pi.get_variables(), backup_pi.get_variables())
        ])

    assert sum(
        [max_iters > 0, max_timesteps > 0, max_episodes > 0,
         max_seconds > 0]) == 1, "Only one time constraint permitted"

    # Build generator for all solutions
    actors = []
    best_fitness = -np.inf

    eval_seq = traj_segment_generator_eval(pi,
                                           base_env,
                                           timesteps_per_actorbatch,
                                           stochastic=True)
    for i in range(popsize):
        newActor = traj_segment_generator(pi,
                                          base_env,
                                          timesteps_per_actorbatch,
                                          stochastic=True,
                                          eval_iters=eval_iters)
        actors.append(newActor)
    while True:
        if max_timesteps and timesteps_so_far >= max_timesteps:
            logger.log("Max time steps")
            break
        elif max_episodes and episodes_so_far >= max_episodes:
            logger.log("Max episodes")
            break
        elif max_iters and iters_so_far >= max_iters:
            logger.log("Max iterations")
            break
        elif max_seconds and time.time() - tstart >= max_seconds:
            logger.log("Max time")
            break
        assign_backup_eq_new()  # backup current policy

        if schedule == 'constant':
            cur_lrmult = 1.0
        elif schedule == 'linear':
            cur_lrmult = max(
                1.0 - float(timesteps_so_far) / (max_timesteps / 2), 0)
        else:
            raise NotImplementedError

        logger.log("********** Generation %i ************" % iters_so_far)
        eval_seg = eval_seq.__next__()
        rewbuffer.extend(eval_seg["ep_rets"])
        lenbuffer.extend(eval_seg["ep_lens"])
        if iters_so_far == 0:
            result_record()
            assign_weights_to_mean(pi_get_flat_params())
        # mean = pi_get_flat_params()
        solutions = reproduce(popsize)
        ob_segs = None
        segs = []
        costs = []
        lens = []
        for id, solution in enumerate(solutions[0]):
            # pi.set_Flat_variables(solution)
            pi_set_from_flat_params(solution)
            seg = actors[id].__next__()
            costs.append(-np.mean(seg["ep_rets"]))
            lens.append(np.sum(seg["ep_lens"]))
            segs.append(seg)
            if ob_segs is None:
                ob_segs = {'ob': np.copy(seg['ob'])}
            else:
                ob_segs['ob'] = np.append(ob_segs['ob'], seg['ob'], axis=0)
            assign_new_eq_backup()
        optimize(solutions[0], np.array(costs), cur_lrmult * optim_stepsize)
        # fit_idx = np.array(costs).flatten().argsort()[:len(costs)]
        # solutions = np.array(solutions)[fit_idx]
        # costs = np.array(costs)[fit_idx]
        # segs = np.array(segs)[fit_idx]
        # # Weights decay
        # # costs, real_costs = fitness_shift(costs)
        # # costs, real_costs = compute_centered_ranks(costs)
        # l2_decay = compute_weight_decay(0.01, solutions)
        # costs += l2_decay
        # costs, real_costs = fitness_normalization(costs)
        # # best_solution = np.copy(solutions[0])
        # # best_fitness = -real_costs[0]
        # # rewbuffer.extend(segs[0]["ep_rets"])
        # # lenbuffer.extend(segs[0]["ep_lens"])
        # es.tell_real_seg(solutions = solutions, function_values = costs, real_f = real_costs, segs = segs)
        # best_solution = np.copy(es.result[0])
        # best_fitness = -es.result[1]
        # rewbuffer.extend(es.result[3]["ep_rets"])
        # lenbuffer.extend(es.result[3]["ep_lens"])
        # logger.log("Generation:", es.countiter)
        # logger.log("Best Solution Fitness:", best_fitness)
        pi_set_from_flat_params(get_mean()[0])

        ob = ob_segs["ob"]
        if hasattr(pi, "ob_rms"):
            pi.ob_rms.update(
                ob)  # update running mean/std for observation normalization

        iters_so_far += 1
        episodes_so_far += sum(lens)
Exemplo n.º 5
0
class Worker_pop(object):
    def __init__(self, name, data, global_pop, bili):
        with tf.variable_scope(name):
            self.name = name
            self.max_fit = 0.0
            self.fit_val = 0.0
            self.dr = 0.0
            self.bili = bili
            self.DNA_size = data.getDNA_size()
            # self.mean_params, self.cov_params, self.mean, self.cov = self._creat_net(name, data.getDNA_size())
            with tf.variable_scope('mean'):
                self.mean = tf.Variable(tf.truncated_normal([
                    self.DNA_size,
                ],
                                                            stddev=0.05,
                                                            mean=0.5),
                                        dtype=tf.float32,
                                        name=name + '_mean')
            with tf.variable_scope('cov'):
                self.cov = tf.Variable(1.0 * tf.eye(self.DNA_size),
                                       dtype=tf.float32,
                                       name=name + '_cov')
            self.mvn = MultivariateNormalFullCovariance(loc=self.mean,
                                                        covariance_matrix=abs(
                                                            self.cov))
            self.make_kid = self.mvn.sample(N_POP)
            self.tfkids_fit = tf.placeholder(tf.float32, [
                math.floor(N_POP / Factor),
            ])
            self.tfkids = tf.placeholder(
                tf.float32, [math.floor(N_POP / Factor), self.DNA_size])
            # self.loss = -tf.reduce_mean(
            #     self.mvn.log_prob(self.tfkids) * self.tfkids_fit + 0.01 * self.mvn.log_prob(
            #         self.tfkids) * self.mvn.prob(
            #         self.tfkids))
            # self.loss = -tf.reduce_mean(self.mvn.log_prob(self.tfkids) * 0.04 * (self.tfkids_fit ** 3))
            self.loss = -tf.reduce_mean(
                self.mvn.log_prob(self.tfkids) * self.tfkids_fit)
            self.train_op = tf.train.AdamOptimizer().minimize(self.loss)
            # self.train_op = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(
            #     self.loss)  # compute and apply gradients for mean and cov
            self.mean_params = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope=name + '/mean')
            self.cov_params = tf.get_collection(
                tf.GraphKeys.TRAINABLE_VARIABLES, scope=name + '/cov')
            with tf.name_scope('pull'):
                self.pull_mean_op = self.mean.assign(global_pop.mean)
                self.pull_cov_op = self.cov.assign(global_pop.cov)
                # self.pull_mean_params_op = [l_p.assign(g_p) for l_p, g_p in
                #                             zip(self.mean_params, global_pop.mean_params)]
                # self.pull_cov_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.cov_params, global_pop.cov_params)]
            with tf.name_scope('push'):
                self.push_mean_op = global_pop.mean.assign(self.mean)
                self.push_cov_op = global_pop.cov.assign(self.cov)
                # self.push_mean_params_op = [g_p.assign(l_p) for g_p, l_p in
                #                             zip(global_pop.mean_params, self.mean_params)]
                # self.push_cov_params_op = [g_p.assign(l_p) for g_p, l_p in zip(global_pop.cov_params, self.cov_params)]
            with tf.name_scope('restart'):
                self.re_mean_op = self.mean.assign(
                    tf.Variable(tf.truncated_normal([
                        self.DNA_size,
                    ],
                                                    stddev=0.05,
                                                    mean=0.5),
                                dtype=tf.float32))
                self.re_cov_op = self.cov.assign(
                    tf.Variable(1.0 * tf.eye(self.DNA_size), dtype=tf.float32))

    def _update_net(self):
        sess.run([self.push_mean_op, self.push_cov_op])
        # lock_push.acquire()
        # self.push_mean_params_op = [g_p.assign(l_p) for g_p, l_p in zip(global_pop.mean_params, self.mean_params)]
        # self.push_cov_params_op = [g_p.assign(l_p) for g_p, l_p in zip(global_pop.cov_params, self.cov_params)]
        # sess.run([self.push_mean_params_op, self.push_cov_params_op])
        # self.update_mean = self.train_op.apply_gradients(zip(self.mean_grads, global_pop.mean_params))
        # self.update_cov = self.train_op.apply_gradients(zip(self.cov_grads, global_pop.cov_params))
        # sess.run([self.update_mean, self.update_cov])  # local grads applies to global net
        # lock_push.release()

    def _pull_net(self):
        sess.run([self.pull_mean_op, self.pull_cov_op])
        # lock_pull.acquire()
        # self.pull_mean_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.mean_params, global_pop.mean_params)]
        # self.pull_cov_params_op = [l_p.assign(g_p) for l_p, g_p in zip(self.cov_params, global_pop.cov_params)]
        # sess.run([self.pull_mean_params_op, self.pull_cov_params_op])
        # lock_pull.release()

    def _restart_net(self):
        sess.run([self.re_mean_op, self.re_cov_op])

    def getMaxfit(self):
        # self.fit_val = (1 - self.bili) * self.fit_val + self.max_fit
        return self.max_fit

    def setMaxfit(self, fit):
        # self.fit_val = (1 - self.bili) * self.fit_val + fit
        if fit > self.max_fit:
            self.max_fit = fit

    def getFit_val(self):
        return self.fit_val

    def setFit_val(self, fit_list):
        fit_list.sort(reverse=True)
        len_val = math.floor(0.2 * len(fit_list))
        fits_val = 0.0
        for i in range(len_val):
            fits_val = fits_val + 0.2 * fit_list[i] * math.exp(-(i / 2))
        # max_pop_fit = 0.0
        # for i in fit_list:
        #     if i > max_pop_fit:
        #         max_pop_fit = i
        self.fit_val = 0.5 * self.fit_val + 0.5 * fits_val

    def getDr(self):
        return self.dr

    def setDr(self, dr):
        self.dr = dr

    def getMvn(self):
        return self.mvn

    def setMvn(self, mvn):
        self.mvn = tf.Variable(mvn.initialized_value())

    def getMean(self):
        return self.mean

    def getCov(self):
        return self.cov
Exemplo n.º 6
0
], stddev=0.02, mean=0.5),
                   dtype=tf.float32)
cov = tf.Variable(tf.eye(DNA_SIZE), dtype=tf.float32)
mvn = MultivariateNormalFullCovariance(
    loc=mean,
    covariance_matrix=abs(
        cov + tf.Variable(0.001 * tf.eye(DNA_SIZE), dtype=tf.float32)))
make_kid = mvn.sample(N_POP)

#==========================compute gradient and update mean and covariance matrix from sample and fitness
tfkids_fit = tf.placeholder(tf.float32, [
    N_POP,
])
tfkids = tf.placeholder(tf.float32, [N_POP, DNA_SIZE])
loss = -tf.reduce_mean(
    mvn.log_prob(tfkids) * tfkids_fit +
    0.01 * mvn.log_prob(tfkids) * mvn.prob(tfkids))  # log prob * fitness
# print(0.01 * mvn.log_prob(tfkids) * mvn.prob(tfkids))
train_op = tf.train.GradientDescentOptimizer(LR).minimize(
    loss)  # compute and apply gradients for mean and cov

sess = tf.Session()
sess.run(tf.global_variables_initializer())

max = 0
dr = 0
for g in range(N_GENERATION):
    if N_GENERATION % 10 == 0:
        LR = LR * 0.9
    kids = sess.run(make_kid)
    kids_fit = []
def get_fitness(pred):
    return -((pred[:, 00])**2 + pred[:, 1]**2)


mean = tf.Variable(tf.random_normal([
    2,
], 13., 1.), dtype=tf.float32)
cov = tf.Variable(5. * tf.eye(DNA_SIZE), dtype=tf.float32)
mvn = MultivariateNormalFullCovariance(loc=mean, covariance_matrix=cov)
make_kid = mvn.sample(N_POP)

tfkids_fit = tf.placeholder(tf.float32, [
    N_POP,
])
tfkids = tf.placeholder(tf.float32, [N_POP, DNA_SIZE])
loss = -tf.reduce_mean(mvn.log_prob(tfkids) * tfkids_fit)
train_op = tf.train.GradientDescentOptimizer(LR).minimize(loss)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

n = 300
x = np.linspace(-20, 20, n)
X, Y = np.meshgrid(x, x)
Z = np.zeros_like(X)
for i in range(n):
    for j in range(n):
        Z[i, j] = get_fitness(np.array([[x[i], x[j]]]))
plt.contourf(X, Y, -Z, 100, cmap=plt.cm.rainbow)
plt.ylim(-20, 20)
plt.xlim(-20, 20)
初始 cov 為 tf.eye(DNA_SIZE = 2) => 對角矩陣,代表X1與X2兩個r.v為不相關(不是獨立)!!
normal_dist.sample(POP_SIZE=20) = [x1_1    x1_2
                                   x2_1    x2_2
                                   ...   ...
                                   x20_1   x20_2] 
normal_dist.sample(POP_SIZE=20)代表讓X1與X2依照定義的 mean 與 cov 所形成的 distribution 以 sample 出20個值,並把這20個值當作訓練的數據丟進神經網路
而loss所要微分的對象即是 mean 與 var
'''
mean = tf.Variable(tf.random_normal([DNA_SIZE , ] , 5. , 1.) , dtype = tf.float32 , name = 'mean')
cov = tf.Variable(3. * tf.eye(DNA_SIZE) , dtype = tf.float32 , name = 'cov')
normal_dist = MultivariateNormalFullCovariance(loc = mean , covariance_matrix = cov)
make_child = normal_dist.sample(POP_SIZE) # 在定義好的normal_dist上sample資料

childs_fitness_input = tf.placeholder(tf.float32 , [POP_SIZE , ])
childs_input = tf.placeholder(tf.float32 , [POP_SIZE , DNA_SIZE])
loss = -tf.reduce_mean(normal_dist.log_prob(childs_input) * childs_fitness_input) # log prob * fitness
train_op = tf.train.GradientDescentOptimizer(LR).minimize(loss)

sess = tf.Session()
sess.run(tf.global_variables_initializer())
#--------------------------建立神經網路--------------------------#


# 畫等高線圖
contour()

# 開始訓練神經網路
for generation in range(0 , N_GENERATION):

    childs = sess.run(make_child)
    childs_fitness = get_fitness(childs)

# build multivariate distribution
mean = tf.Variable(tf.random_normal([
    2,
], 13., 1.), dtype=tf.float32)
cov = tf.Variable(5. * tf.eye(DNA_SIZE), dtype=tf.float32)
mvn = MultivariateNormalFullCovariance(
    loc=mean, covariance_matrix=cov)  # start build our model
make_kid = mvn.sample(N_POP)  # sampling operation

# compute gradient and update mean and covariance matrix from sample and fitness
tfkids_fit = tf.placeholder(tf.float32, [N_POP])
tfkids = tf.placeholder(tf.float32, [N_POP, DNA_SIZE])
loss = -tf.reduce_mean(
    mvn.log_prob(tfkids) *
    tfkids_fit)  # log prob * fitness and we want to min or obj function
train_op = tf.train.GradientDescentOptimizer(LR).minimize(
    loss)  # compute and apply gradients for mean and cov

sess = tf.Session()
sess.run(tf.global_variables_initializer())  # initialize tf variables

# something about plotting (can be ignored)
n = 300
x = np.linspace(-20, 20, n)
X, Y = np.meshgrid(x, x)
Z = np.zeros_like(X)
for i in range(n):
    for j in range(n):
        Z[i, j] = get_fitness(np.array([[x[i], x[j]]]))
LR = 0.02            # learning rate


# fitness function
def get_fitness(pred): return -((pred[:, 0])**2 + pred[:, 1]**2)

# build multivariate distribution
mean = tf.Variable(tf.random_normal([2, ], 13., 1.), dtype=tf.float32)
cov = tf.Variable(5. * tf.eye(DNA_SIZE), dtype=tf.float32)
mvn = MultivariateNormalFullCovariance(loc=mean, covariance_matrix=cov)
make_kid = mvn.sample(N_POP)                                    # sampling operation

# compute gradient and update mean and covariance matrix from sample and fitness
tfkids_fit = tf.placeholder(tf.float32, [N_POP, ])
tfkids = tf.placeholder(tf.float32, [N_POP, DNA_SIZE])
loss = -tf.reduce_mean(mvn.log_prob(tfkids)*tfkids_fit)         # log prob * fitness
train_op = tf.train.GradientDescentOptimizer(LR).minimize(loss) # compute and apply gradients for mean and cov

sess = tf.Session()
sess.run(tf.global_variables_initializer())                     # initialize tf variables

# something about plotting (can be ignored)
n = 300
x = np.linspace(-20, 20, n)
X, Y = np.meshgrid(x, x)
Z = np.zeros_like(X)
for i in range(n):
    for j in range(n):
        Z[i, j] = get_fitness(np.array([[x[i], x[j]]]))
plt.contourf(X, Y, -Z, 100, cmap=plt.cm.rainbow); plt.ylim(-20, 20); plt.xlim(-20, 20); plt.ion()