示例#1
0
    def run(self):

        self.env = gym.make(self.args.task)
        self.env.seed(randint(0, 999999))
        if self.monitor:
            self.env.monitor.start('monitor/', force=True)

        # tensorflow variables (same as in model.py)
        self.observation_size = self.env.observation_space.shape[0]
        self.action_size = np.prod(self.env.action_space.shape)
        self.hidden_size = 64
        weight_init = tf.random_uniform_initializer(-0.05, 0.05)
        bias_init = tf.constant_initializer(0)

        # tensorflow model of the policy
        self.obs = tf.placeholder(tf.float32, [None, self.observation_size])
        self.debug = tf.constant([2, 2])
        with tf.variable_scope("policy-a"):
            h1 = fully_connected(self.obs, self.observation_size, self.hidden_size, weight_init, bias_init, "policy_h1")
            h1 = tf.nn.relu(h1)
            h2 = fully_connected(h1, self.hidden_size, self.hidden_size, weight_init, bias_init, "policy_h2")
            h2 = tf.nn.relu(h2)
            h3 = fully_connected(h2, self.hidden_size, self.action_size, weight_init, bias_init, "policy_h3")
            action_dist_logstd_param = tf.Variable((.01*np.random.randn(1, self.action_size)).astype(np.float32), name="policy_logstd")
        self.action_dist_mu = h3
        self.action_dist_logstd = tf.tile(action_dist_logstd_param, tf.stack((tf.shape(self.action_dist_mu)[0], 1)))

        config = tf.ConfigProto(device_count={'GPU': 0})
        self.session = tf.Session(config=config)
        self.session.run(tf.global_variables_initializer())
        var_list = tf.trainable_variables()

        self.set_policy = SetPolicyWeights(self.session, var_list)

        while True:
            # get a task, or wait until it gets one
            next_task = self.task_q.get(block=True)
            if next_task == 1:
                # the task is an actor request to collect experience
                path = self.rollout()
                self.task_q.task_done()
                self.result_q.put(path)
            elif next_task == 2:
                print("kill message")
                if self.monitor:
                    self.env.monitor.close()
                self.task_q.task_done()
                break
            else:
                # the task is to set parameters of the actor policy
                self.set_policy(next_task)
                # super hacky method to make sure when we fill the queue with set parameter tasks,
                # an actor doesn't finish updating before the other actors can accept their own tasks.
                time.sleep(0.1)
                self.task_q.task_done()
        return
示例#2
0
    def create_net(self, shape):
        hidden_size = 64
        print(shape)
        self.x = tf.placeholder(tf.float32, shape=[None, shape], name="x")
        self.y = tf.placeholder(tf.float32, shape=[None], name="y")

        weight_init = tf.random_uniform_initializer(-0.05, 0.05)
        bias_init = tf.constant_initializer(0)

        with tf.variable_scope("VF"):
            h1 = tf.nn.relu(
                utils.fully_connected(self.x, shape, hidden_size, weight_init,
                                      bias_init, "h1"))
            h2 = tf.nn.relu(
                utils.fully_connected(h1, hidden_size, hidden_size,
                                      weight_init, bias_init, "h2"))
            h3 = utils.fully_connected(h2, hidden_size, 1, weight_init,
                                       bias_init, "h3")
        self.net = tf.reshape(h3, (-1, ))
        l2 = tf.nn.l2_loss(self.net - self.y)
        self.train = tf.train.AdamOptimizer().minimize(l2)
        self.session.run(tf.global_variables_initializer())
示例#3
0
    def build_graph(self):
        self.image_latent = image_encoder(self.image_ph, [128, 512],
                                          self.keep_prob_ph,
                                          training=self.training_ph)

        self.text_latent = text_encoder(self.text_ph, [128, 512],
                                        self.keep_prob_ph,
                                        training=self.training_ph)

        # atanh
        fuse_latent = tf.concat([self.image_latent, self.text_latent], axis=1)
        dense_latent = fully_connected(fuse_latent, 512, 'dense_latent')
        coding_layer = fully_connected(dense_latent,
                                       self.latent_len,
                                       'coding_layer',
                                       activation=None)

        self.fuse_hashcode = tf.tanh(
            self.alpha * coding_layer) + 0.001 * tf.norm((1 / self.alpha))**2

        self.image_tilde = image_decoder(self.fuse_hashcode,
                                         [128, self.input_len_img],
                                         self.keep_prob_ph,
                                         training=self.training_ph)

        self.text_tilde = text_decoder(self.fuse_hashcode,
                                       [128, self.input_len_txt],
                                       self.keep_prob_ph,
                                       training=self.training_ph)

        self.recon_image_loss = tf.reduce_mean(
            tf.square(self.image_ph - self.image_tilde))
        self.recon_text_loss = tf.reduce_mean(
            tf.square(self.text_ph - self.text_tilde))

        self._classify_vars()
        self._init_summary()
    def inference(self, images):
        print '================== Resnet structure ======================='
        print 'num_residual_units: ', self.num_residual_units
        print 'channels in each block: ', self.filters
        print 'stride in each block: ', self.strides
        print '================== constructing network ===================='

        x = utils.input_data(images, self.data_format)
        x = tf.cast(x, self.float_type)

        print 'shape input: ', x.get_shape()
        with tf.variable_scope('conv1'):
            trainable_ = False if self.fix_blocks > 0 else True
            self.fix_blocks -= 1
            x = utils.conv2d_same(x,
                                  64,
                                  7,
                                  2,
                                  trainable=trainable_,
                                  data_format=self.data_format,
                                  initializer=self.initializer,
                                  float_type=self.float_type)
            x = utils.batch_norm('BatchNorm',
                                 x,
                                 trainable_,
                                 self.data_format,
                                 self.mode,
                                 use_gamma=self.bn_use_gamma,
                                 use_beta=self.bn_use_beta,
                                 bn_epsilon=self.bn_epsilon,
                                 bn_ema=self.bn_ema,
                                 float_type=self.float_type)
            x = utils.relu(x)
            x = utils.max_pool(x, 3, 2, self.data_format)
        print 'shape after pool1: ', x.get_shape()

        for block_index in range(len(self.num_residual_units)):
            for unit_index in range(self.num_residual_units[block_index]):
                with tf.variable_scope('block%d' % (block_index + 1)):
                    with tf.variable_scope('unit_%d' % (unit_index + 1)):
                        stride = 1
                        if unit_index == self.num_residual_units[
                                block_index] - 1:
                            stride = self.strides[block_index]

                        trainable_ = False if self.fix_blocks > 0 else True
                        self.fix_blocks -= 1
                        x = utils.bottleneck_residual(
                            x,
                            self.filters[block_index],
                            stride,
                            data_format=self.data_format,
                            initializer=self.initializer,
                            rate=self.rate[block_index],
                            trainable=trainable_,
                            bn_mode=self.mode,
                            bn_use_gamma=self.bn_use_gamma,
                            bn_use_beta=self.bn_use_beta,
                            bn_epsilon=self.bn_epsilon,
                            bn_ema=self.bn_ema,
                            float_type=self.float_type)
            print 'shape after block %d: ' % (block_index + 1), x.get_shape()

        with tf.variable_scope('logits'):
            x = utils.global_avg_pool(x, self.data_format)
            self.logits = utils.fully_connected(x,
                                                self.num_classes,
                                                trainable=True,
                                                data_format=self.data_format,
                                                initializer=self.initializer,
                                                float_type=self.float_type)
            self.logits = tf.reshape(self.logits, (-1, self.num_classes))
            self.predictions = tf.nn.softmax(self.logits)

        print '================== network constructed ===================='
        return self.logits
示例#5
0
    pickle.dump(data, pickle_out)

    # Save a picture of the plotted data
    plot_data(train_data, train_label, 'data', test_data, test_label)
    optimizer = keras.optimizers.RMSprop(lr=0.01, decay=0)

    # Generate model and train on just the training data.
    n_hidden_layers = 5
    n_neurons_per_layer = 25
    good_model_params = {
        "n_hidden_layers": n_hidden_layers,
        "n_neurons_per_layer": n_neurons_per_layer,
        "n_points": n_points,
        "seed_value": seed_value
    }
    model = fully_connected(n_hidden_layers, n_neurons_per_layer)

    model.compile(optimizer=optimizer,
                  loss=binary_crossentropy,
                  metrics=['binary_accuracy'])
    # Create directory and callbacks to save model+checkpoints
    params_path = os.path.join(directory, "good_model_params")
    pickle_out = open(params_path, "wb")
    pickle.dump(good_model_params, pickle_out)
    filepath = os.path.join(directory, "good_model_{epoch}.hdf5")
    checkpoint = keras.callbacks.ModelCheckpoint(filepath,
                                                 verbose=0,
                                                 save_best_only=False,
                                                 save_weights_only=True)
    tensorboard_callback = keras.callbacks.TensorBoard(histogram_freq=1)
    callbacks_list = [checkpoint, tensorboard_callback]
示例#6
0
H = X
"""
# Number of filters and layers of the CNN
n_filters = [3, 2, 1]
for layer_i, n_filters_i in enumerate(n_filters):
    H, W = conv2d(H, n_filters_i, k_h=3, k_w=3, d_h=1, d_w=1, name=str(layer_i))
    H = tf.nn.relu(H)
    if layer_i % 2 == 1:
        H = tf.layers.max_pooling2d(H, pool_size=(2, 2), strides=(1, 1), padding='SAME', name=str(layer_i))
"""

# Number of filters and layers of the FCN
layers = [100, 100, 4]
for layer_i, n_output_i in enumerate(layers):
    H, W = fully_connected(H, n_output=n_output_i, name=layer_i)
    if layer_i == len(layers) - 1:
        H = tf.nn.softmax(H)
    else:
        H = tf.nn.relu(H)

Y_predicted = H

# Cost function
loss = binary_cross_entropy(Y_predicted, Y)
cost = tf.reduce_mean(tf.reduce_sum(loss, 1))

# Measure of accuracy
predicted_y = tf.argmax(Y_predicted, 1)
actual_y = tf.argmax(Y, 1)
correct_prediction = tf.equal(predicted_y, actual_y)
示例#7
0
    def make_model(self):
        self.observation_size = self.observation_space.shape[0]
        self.action_size = np.prod(self.action_space.shape)
        self.hidden_size = 64

        weight_init = tf.random_uniform_initializer(-0.05, 0.05)
        bias_init = tf.constant_initializer(0)

        config = tf.ConfigProto(device_count={'GPU': 0})
        self.session = tf.Session(config=config)

        self.obs = tf.placeholder(tf.float32, [None, self.observation_size])
        self.action = tf.placeholder(tf.float32, [None, self.action_size])
        self.advantage = tf.placeholder(tf.float32, [None])
        self.oldaction_dist_mu = tf.placeholder(tf.float32,
                                                [None, self.action_size])
        self.oldaction_dist_logstd = tf.placeholder(tf.float32,
                                                    [None, self.action_size])

        with tf.variable_scope("policy"):
            h1 = utils.fully_connected(self.obs, self.observation_size,
                                       self.hidden_size, weight_init,
                                       bias_init, "policy_h1")
            h1 = tf.nn.relu(h1)
            h2 = utils.fully_connected(h1, self.hidden_size, self.hidden_size,
                                       weight_init, bias_init, "policy_h2")
            h2 = tf.nn.relu(h2)
            h3 = utils.fully_connected(h2, self.hidden_size, self.action_size,
                                       weight_init, bias_init, "policy_h3")
            action_dist_logstd_param = tf.Variable(
                (.01 * np.random.randn(1, self.action_size)).astype(
                    np.float32),
                name="policy_logstd")
        # means for each action
        self.action_dist_mu = h3
        # log standard deviations for each actions
        self.action_dist_logstd = tf.tile(
            action_dist_logstd_param,
            tf.stack((tf.shape(self.action_dist_mu)[0], 1)))

        batch_size = tf.shape(self.obs)[0]
        # what are the probabilities of taking self.action, given new and old distributions
        log_p_n = utils.gauss_log_prob(self.action_dist_mu,
                                       self.action_dist_logstd, self.action)
        log_oldp_n = utils.gauss_log_prob(self.oldaction_dist_mu,
                                          self.oldaction_dist_logstd,
                                          self.action)

        # tf.exp(log_p_n) / tf.exp(log_oldp_n)
        ratio = tf.exp(log_p_n - log_oldp_n)

        # importance sampling of surrogate loss (L in paper)
        surr = -tf.reduce_mean(ratio * self.advantage)
        var_list = tf.trainable_variables()

        batch_size_float = tf.cast(batch_size, tf.float32)
        # kl divergence and shannon entropy
        kl = utils.gauss_KL(self.oldaction_dist_mu, self.oldaction_dist_logstd,
                            self.action_dist_mu,
                            self.action_dist_logstd) / batch_size_float
        ent = utils.gauss_ent(self.action_dist_mu,
                              self.action_dist_logstd) / batch_size_float

        self.losses = [surr, kl, ent]
        # policy gradient
        self.pg = utils.flatgrad(surr, var_list)

        # KL divergence w/ itself, with first argument kept constant.
        kl_firstfixed = utils.gauss_selfKL_firstfixed(
            self.action_dist_mu, self.action_dist_logstd) / batch_size_float
        # gradient of KL w/ itself
        grads = tf.gradients(kl_firstfixed, var_list)
        # what vector we're multiplying by
        self.flat_tangent = tf.placeholder(tf.float32, [None])
        shapes = map(utils.var_shape, var_list)
        start = 0
        tangents = []
        for shape in shapes:
            size = np.prod(shape)
            param = tf.reshape(self.flat_tangent[start:(start + size)], shape)
            tangents.append(param)
            start += size

        # gradient of KL w/ itself * tangent
        gvp = [tf.reduce_sum(g * t) for (g, t) in zip(grads, tangents)]
        # 2nd gradient of KL w/ itself * tangent
        self.fvp = utils.flatgrad(gvp, var_list)
        # the actual parameter values
        self.gf = utils.GetFlat(self.session, var_list)
        # call this to set parameter values
        self.sff = utils.SetFromFlat(self.session, var_list)
        self.session.run(tf.global_variables_initializer())
        # value function
        # self.vf = VF(self.session)
        self.vf = LinearVF()

        self.get_policy = utils.GetPolicyWeights(self.session, var_list)