Exemplo n.º 1
0
 def __init__(self, nHidden, seqLen, guidence, newNet):
     self.nHidden = nHidden
     self.seqLen = seqLen
     tmp = self.getEmbedding()
     self.embedding = tf.Variable(tmp)
     with tf.variable_scope("training_variable"):
         self.weights = {
             "ATT":
             tf.Variable(
                 tf.truncated_normal(shape=[2 * self.nHidden, self.nHidden],
                                     stddev=0.08,
                                     name="text_att")),
             "ATTG":
             tf.Variable(
                 tf.truncated_normal(shape=[200, self.nHidden],
                                     stddev=0.08,
                                     name="text_att2")),
             "ATTS":
             tf.Variable(
                 tf.truncated_normal(shape=[self.nHidden, 1],
                                     stddev=0.08,
                                     name="text_att3")),
             "Fw1":
             tf.Variable(
                 tf.truncated_normal(shape=[200, self.nHidden],
                                     stddev=0.08,
                                     name="init_fw1")),
             "Fw2":
             tf.Variable(
                 tf.truncated_normal(shape=[200, self.nHidden],
                                     stddev=0.08,
                                     name="init_fw2")),
             "Bw1":
             tf.Variable(
                 tf.truncated_normal(shape=[200, self.nHidden],
                                     stddev=0.08,
                                     name="init_bw1")),
             "Bw2":
             tf.Variable(
                 tf.truncated_normal(shape=[200, self.nHidden],
                                     stddev=0.08,
                                     name="init_bw2")),
         }
         self.biases = {
             "Fw1":
             tf.Variable(
                 tf.constant(0.01, shape=[self.nHidden], name="init_Fw1")),
             "Fw2":
             tf.Variable(
                 tf.constant(0.01, shape=[self.nHidden], name="init_Fw2")),
             "Bw1":
             tf.Variable(
                 tf.constant(0.01, shape=[self.nHidden], name="init_Bw1")),
             "Bw2":
             tf.Variable(
                 tf.constant(0.01, shape=[self.nHidden], name="init_Bw2")),
         }
     self.X = tf.placeholder(tf.int32, [None, self.seqLen])
     self.pKeep = tf.placeholder(tf.float32)
     self.build(guidence, newNet)
Exemplo n.º 2
0
def linear_regression():
    x_train = np.asarray([1, 2, 3, 4, 5, 6, 7, 8, 9, 11])
    y_train = np.asarray([0.1, 0.2, 0.32, 0.43, 0.54, 0.65, 0.77, 0.88, 0.94, 1])
    n_sample = x_train.shape[0]
    x_ = tf.placeholder(tf.float32, name="x")
    y_ = tf.placeholder(tf.float32, name="y")
    w = tf.get_variable("weights", initializer=tf.constant(0.0))
    b = tf.get_variable("bias", initializer=tf.constant(0.0))
    y_predict = w * x_ + b
    loss = tf.square(y_ - y_predict, name='loss')
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)
    writer = tf.summary.FileWriter("./graphs", tf.get_default_graph())
    writer.close()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        for i in range(100):
            total_loss = 0
            for x, y in zip(x_train, y_train):
                _, _loss = sess.run([optimizer, loss], feed_dict={x_: x, y_: y})
                total_loss += _loss
            print(f"Epoch {i}: {total_loss / n_sample}")
        w_out, b_out = sess.run([w, b])
        y_predict = x_train * w_out + b_out
        for i, j in zip(y_predict, y_train):
            print(f"{i} : {j}")
        plt.plot(x_train, y_predict, "r-", label="predict")
        plt.plot(x_train, y_train, "go", label="data")
        plt.title("ABC")
        plt.xlabel("x")
        plt.ylabel("y")
        plt.show()
Exemplo n.º 3
0
    def _build_net(self):
        # Building the structure of neural network.
        def build_layer(s, c_names, n_l1, n_l2, w_initializer, b_initializer):
            with tf.variable_scope('l1'):
                w1 = tf.get_variable('w1', [self.n_features, n_l1],
                                     initializer=w_initializer,
                                     collections=c_names)
                b1 = tf.get_variable('b1', [1, n_l1],
                                     initializer=b_initializer,
                                     collections=c_names)
                l1 = tf.nn.relu(tf.matmul(s, w1) + b1)
            with tf.variable_scope('l2'):
                w2 = tf.get_variable('w2', [n_l1, n_l2],
                                     initializer=w_initializer,
                                     collections=c_names)
                b2 = tf.get_variable('b2', [1, n_l2],
                                     initializer=b_initializer,
                                     collections=c_names)
                l2 = tf.nn.relu(tf.matmul(l1, w2) + b2)
            with tf.variable_scope('l3'):
                w3 = tf.get_variable('w3', [n_l2, self.n_actions],
                                     initializer=w_initializer,
                                     collections=c_names)
                b3 = tf.get_variable('b3', [1, self.n_actions],
                                     initializer=b_initializer,
                                     collections=c_names)
                l3 = tf.nn.relu(tf.matmul(l2, w3) + b3)
            return l3

        # Building the evaluate net

        self.state = tf.placeholder(tf.float32, [None, self.n_features],
                                    name='state')
        self.q_target = tf.placeholder(tf.float32, [None, self.n_actions],
                                       name='q_target')  # expect output

        with tf.variable_scope('eval_net'):
            c_names, n_l1, n_l2, w_initializer, b_initializer = [
                'eval_net_params', tf.GraphKeys.GLOBAL_VARIABLES
            ], 64, 64, tf.random_normal_initializer(
                0.0, 0.3), tf.random_normal_initializer(0., 0.3)
            self.q_eval = build_layer(self.state, c_names, n_l1, n_l2,
                                      w_initializer, b_initializer)

        with tf.variable_scope('loss'):
            self.loss = tf.reduce_mean(
                tf.squared_difference(self.q_target, self.q_eval))
        with tf.variable_scope('train'):
            self._train_op = tf.train.RMSPropOptimizer(
                self.learning_rate).minimize(self.loss)
        # Building the target net.
        self.state_ = tf.placeholder(tf.float32, [None, self.n_features],
                                     name='state_')
        with tf.variable_scope('target_net'):
            c_names = ['target_net_params', tf.GraphKeys.GLOBAL_VARIABLES]
            self.q_next = build_layer(self.state_, c_names, n_l1, n_l2,
                                      w_initializer, b_initializer)
Exemplo n.º 4
0
    def __init__(self,
                 n_state,
                 n_action,
                 learning_rate,
                 gamma,
                 replay_buffer_size=3000,
                 sess: tf.Session = None):
        self.n_state = n_state
        self.n_action = n_action
        self.fai_s_size = 512
        # shape: (state_size, action_size)
        self.w = np.zeros([n_state])
        self.learning_rate = learning_rate
        self.gamma = gamma
        self.replay_buffer = np.zeros(
            [replay_buffer_size, self.n_state * 2 + 2])
        self.memory_size = replay_buffer_size
        self.memory_count = 0
        self.state = tf.placeholder(tf.float32, [None, self.n_state])
        self.state_hat = tf.placeholder(tf.float32, [None, self.n_state])
        self.state_ = tf.placeholder(tf.float32, [None, self.n_state])
        self.rs_p = tf.placeholder(tf.float32, [None, 1])
        if sess is None:
            self.sess = tf.Session()
        else:
            self.sess = sess
        self.eval_collection_name = [
            'eval_net_collection', tf.GraphKeys.GLOBAL_VARIABLES
        ]
        self.target_collection_name = [
            'target_net_collection', tf.GraphKeys.GLOBAL_VARIABLES
        ]

        shutil.rmtree("./log")
        os.mkdir("./log")
        with tf.variable_scope('assign_op'):
            e_params = tf.get_collection('eval_collection_name')
            t_params = tf.get_collection('target_net_collection')
            self.assign_op = [
                tf.assign(t, e) for t, e in zip(t_params, e_params)
            ]
        with tf.variable_scope('eval_net'):
            self.eval_fai, self.eval_s_hat, self.eval_r_s, self.eval_M = self._build_net(
                self.eval_collection_name)
        with tf.variable_scope('target_net'):
            self.eval_fai, self.target_s_hat, self.target_r_s, self.target_M = self._build_net(
                self.target_collection_name)
        tf.summary.FileWriter("./log", self.sess.graph)
        self.sess.run(tf.global_variables_initializer())
Exemplo n.º 5
0
    def __init__(self,
                 exp_rate=0.3,
                 lr=0.1,
                 n_steps=5,
                 episodes=1000,
                 sess: tf.Session = None):
        self.maze = DynaQMaze()
        self.actions = self.maze.action_space
        self.n_actions = len(self.actions)
        self.state_actions = []  # state & action transition
        self.exp_rate = exp_rate
        self.lr = lr
        self.steps = n_steps
        self.episodes = episodes  # number of episodes going to play
        self.steps_per_episode = []
        self.state = self.maze.get_current_state()
        self.Q_values = {}
        # model function
        self.model = {}
        self.maze.render()
        if sess is None:
            self.sess = tf.Session()
        else:
            self.sess = sess
        self.writer1 = tf.summary.FileWriter('./log/r-1', self.sess.graph)
        self.writer2 = tf.summary.FileWriter('./log/r-2', self.sess.graph)
        self.tmp_tensor = tf.placeholder(tf.float32)
        self.all_reward_summary = tf.summary.scalar('all_reward',
                                                    self.tmp_tensor)
        self.all_cnt_summary = tf.summary.scalar('all_cnt', self.tmp_tensor)

        self.write_op = tf.summary.merge_all()
Exemplo n.º 6
0
def train(x_train, y_train):
    n_samples, n_features = x_train.shape

    w = tf.Variable(np.random.rand(input_dim, 1).astype(dtype='float32'),
                    name="weight")
    b = tf.Variable(0.0, dtype=tf.float32, name="bias")

    x = tf.placeholder(dtype=tf.float32, name='x')
    y = tf.placeholder(dtype=tf.float32, name='y')

    predictions = tf.matmul(x, w) + b
    loss = tf.reduce_mean(
        tf.log(1 + tf.exp(tf.multiply(-1.0 * y, predictions))))

    # optimizer = tf.train.GradientDescentOptimizer(learn_rate).minimize(loss)
    optimizer = tf.train.ProximalGradientDescentOptimizer(
        learning_rate=learn_rate,
        l1_regularization_strength=0.1).minimize(loss)

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        for epoch in range(n_epochs):
            for idx in range(0, n_samples, batch_size):
                iE = min(n_samples, idx + batch_size)
                x_batch = x_train[idx:iE, :]
                y_batch = y_train[idx:iE, :]
                sess.run([optimizer], feed_dict={x: x_batch, y: y_batch})
            curr_w, curr_b = sess.run([w, b])

            for idx in range(len(curr_w)):
                if curr_w[idx] < threshold * -1:
                    curr_w[idx] += threshold
                else:
                    curr_w[idx] -= threshold
            sess.run([tf.assign(w, curr_w)])
    return curr_w, curr_b
Exemplo n.º 7
0
    def __post_init__(self):
        if self.batch_size is None:
            self.batch_size = 1
        assert self.nsamples % self.batch_size == 0

        self.enc = encoder.get_encoder(self.models_dir, self.model_name)
        self.hparams = model.default_hparams()
        with open(
                os.path.join(self.models_dir, self.model_name, 'hparams.json')) as f:
            self.hparams.override_from_dict(json.load(f))

        if self.length is None:
            self.length = self.hparams.n_ctx // 2
        elif self.length > self.hparams.n_ctx:
            raise ValueError("Can't get samples longer than window size: %s" % self.hparams.n_ctx)

        self.sess = tf.Session()
        self.sess.run(tf.global_variables_initializer())

        # with tf.Session(graph=tf.Graph()) as self.sess:
        self.sess.run(tf.global_variables_initializer())
        self.context = tf.placeholder(tf.int32, [self.batch_size, None])
        np.random.seed(self.seed)
        tf.set_random_seed(self.seed)
        self.output = sample.sample_sequence(
            hparams=self.hparams, length=self.length,
            context=self.context,
            batch_size=self.batch_size,
            temperature=self.temperature, top_k=self.top_k, top_p=self.top_p
        )

        saver = tf.train.Saver()
        print(f"MODEL DIR {self.models_dir}")
        print(f"MODEL NAME {self.model_name}")
        print(f"PWD {os.getcwd()}")
        print(f"MODEL DIR ABS {Path(self.models_dir).absolute()}")
        ckpt = tf.train.latest_checkpoint(
            os.path.join(self.models_dir, self.model_name))
        saver.restore(self.sess, ckpt)
Exemplo n.º 8
0
 def __init__(self):
     self.embedding = self.getEmb()
     self.embSize = self.embedding.shape[1]
     self.vocabSize = self.embedding.shape[0]
     self.x = tf.placeholder(tf.int32, [None, 5])
     with tf.variable_scope("training_variable"):
         self.weights = {
             "MLP1":
             tf.Variable(
                 tf.truncated_normal(
                     shape=[self.embSize,
                            int(self.embSize / 2)],
                     stddev=0.08)),
             "MLP2":
             tf.Variable(
                 tf.truncated_normal(shape=[int(self.embSize / 2), 1],
                                     stddev=0.08))
         }
         self.biases = {
             "MLP1":
             tf.Variable(
                 tf.constant(0.01,
                             shape=[int(self.embSize / 2)],
                             dtype=tf.float32)),
             "MLP2":
             tf.Variable(tf.constant(0.01, shape=[1], dtype=tf.float32))
         }
     self.inputEmb = tf.nn.embedding_lookup(self.embedding, self.x)
     p1 = tf.matmul(tf.reshape(self.inputEmb, [-1, self.embSize]),
                    self.weights["MLP1"]) + self.biases["MLP1"]
     p1 = tf.matmul(tf.nn.relu(p1),
                    self.weights["MLP2"]) + self.biases["MLP2"]
     p1 = tf.reshape(p1, [-1, 5])
     p1 = tf.reshape(tf.nn.softmax(p1), [-1, 1, 5])
     self.finalState = tf.reshape(tf.matmul(p1, self.inputEmb),
                                  [-1, self.embSize])
Exemplo n.º 9
0
def build_model():

    size = 8  # Single size for easier debugging (for now)
    max_s = [1, 2, 2, 1]  # size of the sliding window for max pooling
    learning_rate = 0.0001

    # frames = tf.placeholder(tf.float32, [None, 256, 256, 5]) # None is the number of samples, rename the variable name later
    frames = tf.placeholder(
        tf.float32, [None, 32, 32, 4], name="frames"
    )  # features: halite_available, others_ship, cargo, self_shipyard
    # can_afford = tf.placeholder(tf.float32, [None, 3])
    turns_left = tf.placeholder(tf.float32, [None, 1], name="turnsleft")
    my_ships = tf.placeholder(tf.float32, [None, 32, 32, 1], name="myships")

    my_ships = tf.cast(my_ships, tf.float32)

    moves = tf.placeholder(tf.uint8, [None, 32, 32, 1], name="moves")
    spawn = tf.placeholder(tf.float32, [None, 1], name="spawn")

    tf.add_to_collection('frames', frames)
    # tf.add_to_collection('can_afford', can_afford)
    tf.add_to_collection('turns_left', turns_left)
    tf.add_to_collection('my_ships', my_ships)
    tf.add_to_collection('moves', moves)
    tf.add_to_collection('spawn', spawn)

    moves = tf.one_hot(moves, 6)

    # ca = tf.layers.dense(can_afford, size)
    tl = tf.layers.dense(turns_left, size)

    # ca = tf.expand_dims(ca, 1)
    # ca = tf.expand_dims(ca, 1)
    tl = tf.expand_dims(tl, 1)
    tl = tf.expand_dims(tl, 1)

    d_l1_a = tf.layers.conv2d(
        frames, size, 3, activation=tf.nn.relu, padding='same'
    )  # input is frames, filters is size, kernal size is 3(x3)
    d_l1_p = tf.nn.max_pool(d_l1_a, max_s, max_s, padding='VALID')  # 16

    d_l2_a = tf.layers.conv2d(d_l1_p,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')
    d_l2_p = tf.nn.max_pool(d_l2_a, max_s, max_s, padding='VALID')  # 8

    d_l3_a = tf.layers.conv2d(d_l2_p,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')
    d_l3_p = tf.nn.max_pool(d_l3_a, max_s, max_s, padding='VALID')  # 4

    d_l4_a = tf.layers.conv2d(d_l3_p,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')
    d_l4_p = tf.nn.max_pool(d_l4_a, max_s, max_s, padding='VALID')  # 2

    d_l5_a = tf.layers.conv2d(d_l4_p,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')
    d_l5_p = tf.nn.max_pool(d_l5_a, max_s, max_s, padding='VALID')  # 1

    final_state = tf.concat([d_l5_p, tl], -1)
    latent = tf.layers.dense(final_state, size, activation=tf.nn.relu)
    # latent = tf.layers.dense(d_l5_p, size, activation=tf.nn.relu)

    u_l5_a = tf.layers.conv2d_transpose(latent,
                                        size,
                                        3,
                                        2,
                                        activation=tf.nn.relu,
                                        padding='same')  # 2
    u_l5_c = tf.concat([u_l5_a, d_l5_a], -1)
    u_l5_s = tf.layers.conv2d(u_l5_c,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')

    u_l4_a = tf.layers.conv2d_transpose(u_l5_s,
                                        size,
                                        3,
                                        2,
                                        activation=tf.nn.relu,
                                        padding='same')  # 4
    u_l4_c = tf.concat([u_l4_a, d_l4_a], -1)
    u_l4_s = tf.layers.conv2d(u_l4_c,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')

    u_l3_a = tf.layers.conv2d_transpose(u_l4_s,
                                        size,
                                        3,
                                        2,
                                        activation=tf.nn.relu,
                                        padding='same')  # 8
    u_l3_c = tf.concat([u_l3_a, d_l3_a], -1)
    u_l3_s = tf.layers.conv2d(u_l3_c,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')

    u_l2_a = tf.layers.conv2d_transpose(u_l3_s,
                                        size,
                                        3,
                                        2,
                                        activation=tf.nn.relu,
                                        padding='same')  # 16
    u_l2_c = tf.concat([u_l2_a, d_l2_a], -1)
    u_l2_s = tf.layers.conv2d(u_l2_c,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')

    u_l1_a = tf.layers.conv2d_transpose(u_l2_s,
                                        size,
                                        3,
                                        2,
                                        activation=tf.nn.relu,
                                        padding='same')  # 32
    u_l1_c = tf.concat([u_l1_a, d_l1_a], -1)
    u_l1_s = tf.layers.conv2d(u_l1_c,
                              size,
                              3,
                              activation=tf.nn.relu,
                              padding='same')

    spawn_logits = tf.layers.dense(latent, 1, activation=None)
    #
    spawn_logits = tf.squeeze(spawn_logits, [1, 2])

    moves_logits = tf.layers.conv2d(u_l1_s,
                                    6,
                                    3,
                                    activation=None,
                                    padding='same')

    tf.add_to_collection('m_logits', moves_logits)
    tf.add_to_collection('s_logits', spawn_logits)

    losses = tf.nn.softmax_cross_entropy_with_logits_v2(labels=moves,
                                                        logits=moves_logits,
                                                        dim=-1)

    losses = tf.expand_dims(losses, -1)

    masked_loss = losses * my_ships

    ships_per_frame = tf.reduce_sum(my_ships, axis=[1, 2])

    frame_loss = tf.reduce_sum(masked_loss, axis=[1, 2])

    average_frame_loss = frame_loss / (ships_per_frame + 0.00000001
                                       )  # First frames have no ship

    spawn_losses = tf.nn.sigmoid_cross_entropy_with_logits(labels=spawn,
                                                           logits=spawn_logits)

    spawn_losses = tf.reduce_mean(spawn_losses)

    loss = tf.reduce_mean(average_frame_loss) + 0.01 * spawn_losses

    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(loss)

    tf.add_to_collection('loss', loss)
    tf.add_to_collection('optimizer', optimizer)

    return
Exemplo n.º 10
0
    correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))

    print(accuracy.eval({x: mnist.test.images, y_: mnist.test.labels}, sess))

# Store variable
_W = W.eval(sess)
_b = b.eval(sess)

sess.close()

# Create new graph for exporting
g_2 = tf.Graph()
with g_2.as_default():
    # Reconstruct graph
    x_2 = tf.placeholder("float", [None, 784], name="input")
    W_2 = tf.constant(_W, name="constant_W")
    b_2 = tf.constant(_b, name="constant_b")
    y_2 = tf.nn.softmax(tf.matmul(x_2, W_2) + b_2, name="output")

    sess_2 = tf.Session()

    init_2 = tf.initialize_all_variables()
    sess_2.run(init_2)

    graph_def = g_2.as_graph_def()

    tf.train.write_graph(graph_def,
                         './tmp/beginner-export',
                         'beginner-graph.pb',
                         as_text=False)
Exemplo n.º 11
0
    def __init__(self, nHidden, seqLen):
        self.representation_score = {}
        self.y = tf.placeholder(tf.float32, shape=[None, 1])
        self.extractFeature = ExtractFeature.ExtractFeature()
        self.imageFeature = ImageFeature.ImageFeature()
        newNet = tf.reduce_mean(self.imageFeature.outputLS, axis=0)
        self.textFeature = TextFeature.TextFeature(
            nHidden, seqLen, self.extractFeature.finalState, newNet)
        self.l2_para = 1e-7
        with tf.variable_scope("training_variable"):

            self.weights = {
                "MLP1":
                tf.Variable(
                    tf.truncated_normal(shape=[512, 256],
                                        stddev=0.08,
                                        name="MLP1_W")),
                "MLP2":
                tf.Variable(
                    tf.truncated_normal(shape=[256, 1],
                                        stddev=0.08,
                                        name="MLP2_W")),
                "ATT_attr1_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.imageFeature.defaultFeatureSize +
                        self.extractFeature.embSize,
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.extractFeature.embSize / 2)
                    ],
                                        stddev=0.08,
                                        name="ATT_attr1_1")),
                "ATT_attr1_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.textFeature.nHidden * 2 +
                        self.extractFeature.embSize,
                        int(self.textFeature.nHidden +
                            self.extractFeature.embSize / 2)
                    ],
                                        stddev=0.08,
                                        name="ATT_attr1_2")),
                "ATT_attr1_3":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        2 * self.extractFeature.embSize,
                        self.extractFeature.embSize
                    ],
                                        stddev=0.08,
                                        name="ATT_attr1_3")),
                "ATT_attr2_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.extractFeature.embSize / 2), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_attr2_1")),
                "ATT_attr2_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.textFeature.nHidden +
                            self.extractFeature.embSize / 2), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_attr2_2")),
                "ATT_attr2_3":
                tf.Variable(
                    tf.truncated_normal(shape=[self.extractFeature.embSize, 1],
                                        stddev=0.08,
                                        name="ATT_attr2_3")),
                "ATT_img1_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.imageFeature.defaultFeatureSize +
                        self.textFeature.nHidden * 2,
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.textFeature.nHidden)
                    ],
                                        stddev=0.08,
                                        name="ATT_image1_1")),
                "ATT_img1_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.imageFeature.defaultFeatureSize +
                        self.extractFeature.embSize,
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.extractFeature.embSize / 2)
                    ],
                                        stddev=0.08,
                                        name="ATT_image1_2")),
                "ATT_img1_3":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.imageFeature.defaultFeatureSize * 2,
                        self.imageFeature.defaultFeatureSize
                    ],
                                        stddev=0.08,
                                        name="ATT_image1_3")),
                "ATT_img2_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.textFeature.nHidden), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_image2_1")),
                "ATT_img2_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.extractFeature.embSize / 2), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_image2_2")),
                "ATT_img2_3":
                tf.Variable(
                    tf.truncated_normal(
                        shape=[self.imageFeature.defaultFeatureSize, 1],
                        stddev=0.08,
                        name="ATT_image2_3")),
                "ATT_text1_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.imageFeature.defaultFeatureSize +
                        self.textFeature.nHidden * 2,
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.textFeature.nHidden)
                    ],
                                        stddev=0.08,
                                        name="ATT_text1_1")),
                "ATT_text1_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.textFeature.nHidden * 2 +
                        self.extractFeature.embSize,
                        int(self.textFeature.nHidden +
                            self.extractFeature.embSize / 2)
                    ],
                                        stddev=0.08,
                                        name="ATT_text1_2")),
                "ATT_text1_3":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        self.textFeature.nHidden * 4,
                        self.textFeature.nHidden * 2
                    ],
                                        stddev=0.08,
                                        name="ATT_text1_3")),
                "ATT_text2_1":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.imageFeature.defaultFeatureSize / 2 +
                            self.textFeature.nHidden), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_text2_1")),
                "ATT_text2_2":
                tf.Variable(
                    tf.truncated_normal(shape=[
                        int(self.textFeature.nHidden +
                            self.extractFeature.embSize / 2), 1
                    ],
                                        stddev=0.08,
                                        name="ATT_text2_2")),
                "ATT_text2_3":
                tf.Variable(
                    tf.truncated_normal(
                        shape=[self.textFeature.nHidden * 2, 1],
                        stddev=0.08,
                        name="ATT_text2_3")),
                "ATT_WI1":
                tf.Variable(
                    tf.truncated_normal(
                        shape=[self.imageFeature.defaultFeatureSize, 512],
                        stddev=0.08,
                        name="ATT_WI")),
                "ATT_WT1":
                tf.Variable(
                    tf.truncated_normal(shape=[2 * nHidden, 512],
                                        stddev=0.08,
                                        name="ATT_WT")),
                "ATT_WA1":
                tf.Variable(
                    tf.truncated_normal(shape=[200, 512],
                                        stddev=0.08,
                                        name="ATT_WA")),
                "ATT_WI2":
                tf.Variable(
                    tf.truncated_normal(
                        shape=[self.imageFeature.defaultFeatureSize, 512],
                        stddev=0.08,
                        name="ATT_WI2")),
                "ATT_WT2":
                tf.Variable(
                    tf.truncated_normal(shape=[2 * nHidden, 512],
                                        stddev=0.08,
                                        name="ATT_WT2")),
                "ATT_WA2":
                tf.Variable(
                    tf.truncated_normal(shape=[200, 512],
                                        stddev=0.08,
                                        name="ATT_WA2")),
                "ATT_WF_1":
                tf.Variable(
                    tf.truncated_normal(shape=[512, 1],
                                        stddev=0.08,
                                        name="ATT_WF_1")),
                "ATT_WF_2":
                tf.Variable(
                    tf.truncated_normal(shape=[512, 1],
                                        stddev=0.08,
                                        name="ATT_WF_2")),
                "ATT_WF_3":
                tf.Variable(
                    tf.truncated_normal(shape=[512, 1],
                                        stddev=0.08,
                                        name="ATT_WF_3")),
            }
            self.biases = {
                "MLP1":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[256],
                                dtype=tf.float32,
                                name="MLP1_b")),
                "MLP2":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[1],
                                dtype=tf.float32,
                                name="MLP2_b")),
                "ATT_attr1_1":
                tf.Variable(
                    tf.constant(
                        0.01,
                        shape=[
                            int(self.imageFeature.defaultFeatureSize / 2 +
                                self.extractFeature.embSize / 2)
                        ],
                        name="ATT_attr1_1")),
                "ATT_attr1_2":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[
                                    int(self.textFeature.nHidden +
                                        self.extractFeature.embSize / 2)
                                ],
                                name="ATT_attr1_2")),
                "ATT_attr1_3":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[self.extractFeature.embSize],
                                name="ATT_attr1_3")),
                "ATT_attr2_1":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_1")),
                "ATT_attr2_2":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_2")),
                "ATT_attr2_3":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_attr2_3")),
                "ATT_img1_1":
                tf.Variable(
                    tf.constant(
                        0.01,
                        shape=[
                            int(self.imageFeature.defaultFeatureSize / 2 +
                                self.textFeature.nHidden)
                        ],
                        name="ATT_image1_1")),
                "ATT_img1_2":
                tf.Variable(
                    tf.constant(
                        0.01,
                        shape=[
                            int(self.imageFeature.defaultFeatureSize / 2 +
                                self.extractFeature.embSize / 2)
                        ],
                        name="ATT_image1_2")),
                "ATT_img1_3":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[self.imageFeature.defaultFeatureSize],
                                name="ATT_image1_3")),
                "ATT_img2_1":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_1")),
                "ATT_img2_2":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_2")),
                "ATT_img2_3":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_image2_3")),
                "ATT_text1_1":
                tf.Variable(
                    tf.constant(
                        0.01,
                        shape=[
                            int(self.imageFeature.defaultFeatureSize / 2 +
                                self.textFeature.nHidden)
                        ],
                        name="ATT_text1_1")),
                "ATT_text1_2":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[
                                    int(self.textFeature.nHidden +
                                        self.extractFeature.embSize / 2)
                                ],
                                name="ATT_text1_2")),
                "ATT_text1_3":
                tf.Variable(
                    tf.constant(0.01,
                                shape=[self.textFeature.nHidden * 2],
                                name="ATT_text1_3")),
                "ATT_text2_1":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_1")),
                "ATT_text2_2":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_2")),
                "ATT_text2_3":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_text2_3")),
                "ATT_WW":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WW")),
                "ATT_WI":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WI")),
                "ATT_WT":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WT")),
                "ATT_WI1":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WI1")),
                "ATT_WT1":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WT1")),
                "ATT_WA":
                tf.Variable(tf.constant(0.01, shape=[512], name="ATT_WA")),
                "ATT_WF_1":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_1")),
                "ATT_WF_2":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_2")),
                "ATT_WF_3":
                tf.Variable(tf.constant(0.01, shape=[1], name="ATT_WF_3")),
            }
        print("newnet dimension :", newNet)

        imageVec = self.Attention(newNet, self.imageFeature.outputLS,
                                  self.textFeature.RNNState,
                                  self.extractFeature.finalState, "ATT_img1",
                                  "ATT_img2", 196, True)
        textVec = self.Attention(self.textFeature.RNNState,
                                 self.textFeature.outputs, newNet,
                                 self.extractFeature.finalState, "ATT_text1",
                                 "ATT_text2", self.textFeature.seqLen, False)
        attrVec = self.Attention(self.extractFeature.finalState,
                                 self.extractFeature.inputEmb, newNet,
                                 self.textFeature.RNNState, "ATT_attr1",
                                 "ATT_attr2", 5, False)

        attHidden = tf.tanh(
            tf.matmul(imageVec, self.weights["ATT_WI1"]) +
            self.biases["ATT_WI1"])
        attHidden2 = tf.tanh(
            tf.matmul(textVec, self.weights["ATT_WT1"]) +
            self.biases["ATT_WT1"])
        attHidden3 = tf.tanh(
            tf.matmul(attrVec, self.weights["ATT_WA1"]) +
            self.biases["ATT_WW"])
        scores1 = tf.matmul(attHidden,
                            self.weights["ATT_WF_1"]) + self.biases["ATT_WF_1"]
        scores2 = tf.matmul(attHidden2,
                            self.weights["ATT_WF_2"]) + self.biases["ATT_WF_2"]
        scores3 = tf.matmul(attHidden3,
                            self.weights["ATT_WF_3"]) + self.biases["ATT_WF_3"]
        scoreLS = [scores1, scores2, scores3]
        scoreLS = tf.nn.softmax(scoreLS, dim=0)
        imageVec = tf.tanh(
            tf.matmul(imageVec, self.weights["ATT_WI2"]) +
            self.biases["ATT_WI"])
        textVec = tf.tanh(
            tf.matmul(textVec, self.weights["ATT_WT2"]) +
            self.biases["ATT_WT"])
        attrVec = tf.tanh(
            tf.matmul(attrVec, self.weights["ATT_WA2"]) +
            self.biases["ATT_WA"])
        self.concatInput = scoreLS[0] * imageVec + scoreLS[
            1] * textVec + scoreLS[2] * attrVec
Exemplo n.º 12
0
def main():
    args = parser.parse_args()
    enc = encoder.get_encoder(CHECKPOINT_DIR, args.model_name)
    hparams = model.default_hparams()
    with open(os.path.join(CHECKPOINT_DIR, args.model_name,
                           'hparams.json')) as f:
        hparams.override_from_dict(json.load(f))

    if args.sample_length > hparams.n_ctx:
        raise ValueError("Can't get samples longer than window size: %s" %
                         hparams.n_ctx)

    if args.model_name == '345M':
        # args.memory_saving_gradients = True
        if args.optimizer == 'adam':
            args.only_train_transformer_layers = True

    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    config.graph_options.rewrite_options.layout_optimizer = rewriter_config_pb2.RewriterConfig.OFF
    with tf.Session(config=config) as sess:
        context = tf.placeholder(tf.int32, [args.batch_size, None])
        context_in = randomize(context, hparams, args.noise)
        output = model.model(hparams=hparams, X=context_in)
        loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=context[:, 1:], logits=output['logits'][:, :-1]))

        if args.val_every > 0:
            val_context = tf.placeholder(tf.int32, [args.val_batch_size, None])
            val_output = model.model(hparams=hparams, X=val_context)
            val_loss = tf.reduce_mean(
                tf.nn.sparse_softmax_cross_entropy_with_logits(
                    labels=val_context[:, 1:],
                    logits=val_output['logits'][:, :-1]))
            val_loss_summary = tf.summary.scalar('val_loss', val_loss)

        tf_sample = sample.sample_sequence(hparams=hparams,
                                           length=args.sample_length,
                                           context=context,
                                           batch_size=args.batch_size,
                                           temperature=1.0,
                                           top_k=args.top_k,
                                           top_p=args.top_p)

        all_vars = [v for v in tf.trainable_variables() if 'model' in v.name]
        train_vars = [v for v in all_vars if '/h' in v.name
                      ] if args.only_train_transformer_layers else all_vars

        if args.optimizer == 'adam':
            opt = tf.train.AdamOptimizer(learning_rate=args.learning_rate)
        elif args.optimizer == 'sgd':
            opt = tf.train.GradientDescentOptimizer(
                learning_rate=args.learning_rate)
        else:
            exit('Bad optimizer:', args.optimizer)

        if args.accumulate_gradients > 1:
            if args.memory_saving_gradients:
                exit(
                    "Memory saving gradients are not implemented for gradient accumulation yet."
                )
            opt = AccumulatingOptimizer(opt=opt, var_list=train_vars)
            opt_reset = opt.reset()
            opt_compute = opt.compute_gradients(loss)
            opt_apply = opt.apply_gradients()
            summary_loss = tf.summary.scalar('loss', opt_apply)
        else:
            if args.memory_saving_gradients:
                opt_grads = memory_saving_gradients.gradients(loss, train_vars)
            else:
                opt_grads = tf.gradients(loss, train_vars)
            opt_grads = list(zip(opt_grads, train_vars))
            opt_apply = opt.apply_gradients(opt_grads)
            summary_loss = tf.summary.scalar('loss', loss)

        summary_lr = tf.summary.scalar('learning_rate', args.learning_rate)
        summaries = tf.summary.merge([summary_lr, summary_loss])

        summary_log = tf.summary.FileWriter(
            os.path.join(CHECKPOINT_DIR, args.run_name))

        saver = tf.train.Saver(var_list=all_vars,
                               max_to_keep=5,
                               keep_checkpoint_every_n_hours=2)
        sess.run(tf.global_variables_initializer())

        if args.restore_from == 'latest':
            ckpt = tf.train.latest_checkpoint(
                os.path.join(CHECKPOINT_DIR, args.run_name))
            if ckpt is None:
                # Get fresh GPT weights if new run.
                ckpt = tf.train.latest_checkpoint(
                    os.path.join(CHECKPOINT_DIR, args.model_name))
        elif args.restore_from == 'fresh':
            ckpt = tf.train.latest_checkpoint(
                os.path.join(CHECKPOINT_DIR, args.model_name))
        else:
            ckpt = tf.train.latest_checkpoint(args.restore_from)
        print('Loading checkpoint', ckpt)
        saver.restore(sess, ckpt)

        print('Loading dataset...')
        chunks = load_dataset(enc,
                              args.dataset,
                              args.combine,
                              encoding=args.encoding)
        data_sampler = Sampler(chunks)
        if args.val_every > 0:
            if args.val_dataset:
                val_chunks = load_dataset(enc,
                                          args.val_dataset,
                                          args.combine,
                                          encoding=args.encoding)
            else:
                val_chunks = chunks
        print('dataset has', data_sampler.total_size, 'tokens')
        print('Training...')

        if args.val_every > 0:
            # Sample from validation set once with fixed seed to make
            # it deterministic during training as well as across runs.
            val_data_sampler = Sampler(val_chunks, seed=1)
            val_batches = [[
                val_data_sampler.sample(1024)
                for _ in range(args.val_batch_size)
            ] for _ in range(args.val_batch_count)]

        counter = 1
        counter_path = os.path.join(CHECKPOINT_DIR, args.run_name, 'counter')
        if os.path.exists(counter_path):
            # Load the step number if we're resuming a run
            # Add 1 so we don't immediately try to save again
            with open(counter_path, 'r') as fp:
                counter = int(fp.read()) + 1

        def save():
            maketree(os.path.join(CHECKPOINT_DIR, args.run_name))
            print(
                'Saving',
                os.path.join(CHECKPOINT_DIR, args.run_name,
                             'model-{}').format(counter))
            saver.save(sess,
                       os.path.join(CHECKPOINT_DIR, args.run_name, 'model'),
                       global_step=counter)
            with open(counter_path, 'w') as fp:
                fp.write(str(counter) + '\n')

        def generate_samples():
            print('Generating samples...')
            context_tokens = data_sampler.sample(1)
            all_text = []
            index = 0
            while index < args.sample_num:
                out = sess.run(
                    tf_sample,
                    feed_dict={context: args.batch_size * [context_tokens]})
                for i in range(min(args.sample_num - index, args.batch_size)):
                    text = enc.decode(out[i])
                    text = '======== SAMPLE {} ========\n{}\n'.format(
                        index + 1, text)
                    all_text.append(text)
                    index += 1
            print(text)
            maketree(os.path.join(SAMPLE_DIR, args.run_name))
            with open(os.path.join(SAMPLE_DIR, args.run_name,
                                   'samples-{}').format(counter),
                      'w',
                      encoding=args.encoding) as fp:
                fp.write('\n'.join(all_text))

        def validation():
            print('Calculating validation loss...')
            losses = []
            for batch in tqdm.tqdm(val_batches):
                losses.append(
                    sess.run(val_loss, feed_dict={val_context: batch}))
            v_val_loss = np.mean(losses)
            v_summary = sess.run(val_loss_summary,
                                 feed_dict={val_loss: v_val_loss})
            summary_log.add_summary(v_summary, counter)
            summary_log.flush()
            print('[{counter} | {time:2.2f}] validation loss = {loss:2.2f}'.
                  format(counter=counter,
                         time=time.time() - start_time,
                         loss=v_val_loss))

        def sample_batch():
            return [data_sampler.sample(1024) for _ in range(args.batch_size)]

        avg_loss = (0.0, 0.0)
        start_time = time.time()

        try:
            while counter < 1000:
                if counter % args.save_every == 0:
                    save()
                if counter % args.sample_every == 0:
                    generate_samples()
                if args.val_every > 0 and (counter % args.val_every == 0
                                           or counter == 1):
                    validation()

                if args.accumulate_gradients > 1:
                    sess.run(opt_reset)
                    for _ in range(args.accumulate_gradients):
                        sess.run(opt_compute,
                                 feed_dict={context: sample_batch()})
                    (v_loss, v_summary) = sess.run((opt_apply, summaries))
                else:
                    (_, v_loss, v_summary) = sess.run(
                        (opt_apply, loss, summaries),
                        feed_dict={context: sample_batch()})

                summary_log.add_summary(v_summary, counter)

                avg_loss = (avg_loss[0] * 0.99 + v_loss,
                            avg_loss[1] * 0.99 + 1.0)

                print(
                    '[{counter} | {time:2.2f}] loss={loss:2.2f} avg={avg:2.2f}'
                    .format(counter=counter,
                            time=time.time() - start_time,
                            loss=v_loss,
                            avg=avg_loss[0] / avg_loss[1]))

                counter += 1
        except KeyboardInterrupt:
            print('interrupted')
            save()
def gain(data_x, gain_parameters):
    '''Impute missing values in data_x
  
  Args:
    - data_x: original data with missing values
    - gain_parameters: GAIN network parameters:
      - batch_size: Batch size
      - hint_rate: Hint rate
      - alpha: Hyperparameter
      - iterations: Iterations
      
  Returns:
    - imputed_data: imputed data
  '''
    # Define mask matrix
    data_m = 1 - np.isnan(data_x)

    # System parameters
    batch_size = gain_parameters['batch_size']
    hint_rate = gain_parameters['hint_rate']
    alpha = gain_parameters['alpha']
    iterations = gain_parameters['iterations']

    # Other parameters
    no, dim = data_x.shape

    # Hidden state dimensions
    h_dim = int(dim)

    # Normalization
    norm_data, norm_parameters = normalization(data_x)
    norm_data_x = np.nan_to_num(norm_data, 0)

    ## GAIN architecture
    # Input placeholders
    # Data vector
    tf.disable_v2_behavior()
    X = tf.placeholder(tf.float32, shape=[None, dim])
    # Mask vector
    M = tf.placeholder(tf.float32, shape=[None, dim])
    # Hint vector
    H = tf.placeholder(tf.float32, shape=[None, dim])

    # Discriminator variables
    D_W1 = tf.Variable(xavier_init([dim * 2, h_dim]))  # Data + Hint as inputs
    D_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

    D_W2 = tf.Variable(xavier_init([h_dim, h_dim]))
    D_b2 = tf.Variable(tf.zeros(shape=[h_dim]))

    D_W3 = tf.Variable(xavier_init([h_dim, dim]))
    D_b3 = tf.Variable(tf.zeros(shape=[dim]))  # Multi-variate outputs

    theta_D = [D_W1, D_W2, D_W3, D_b1, D_b2, D_b3]

    #Generator variables
    # Data + Mask as inputs (Random noise is in missing components)
    G_W1 = tf.Variable(xavier_init([dim * 2, h_dim]))
    G_b1 = tf.Variable(tf.zeros(shape=[h_dim]))

    G_W2 = tf.Variable(xavier_init([h_dim, h_dim]))
    G_b2 = tf.Variable(tf.zeros(shape=[h_dim]))

    G_W3 = tf.Variable(xavier_init([h_dim, dim]))
    G_b3 = tf.Variable(tf.zeros(shape=[dim]))

    theta_G = [G_W1, G_W2, G_W3, G_b1, G_b2, G_b3]

    ## GAIN functions
    # Generator
    def generator(x, m):
        # Concatenate Mask and Data
        inputs = tf.concat(values=[x, m], axis=1)
        G_h1 = tf.nn.relu(tf.matmul(inputs, G_W1) + G_b1)
        G_h2 = tf.nn.relu(tf.matmul(G_h1, G_W2) + G_b2)
        # MinMax normalized output
        G_prob = tf.nn.sigmoid(tf.matmul(G_h2, G_W3) + G_b3)
        return G_prob

    # Discriminator
    def discriminator(x, h):
        # Concatenate Data and Hint
        inputs = tf.concat(values=[x, h], axis=1)
        D_h1 = tf.nn.relu(tf.matmul(inputs, D_W1) + D_b1)
        D_h2 = tf.nn.relu(tf.matmul(D_h1, D_W2) + D_b2)
        D_logit = tf.matmul(D_h2, D_W3) + D_b3
        D_prob = tf.nn.sigmoid(D_logit)
        return D_prob

    ## GAIN structure
    # Generator
    G_sample = generator(X, M)

    # Combine with observed data
    Hat_X = X * M + G_sample * (1 - M)

    # Discriminator
    D_prob = discriminator(Hat_X, H)

    ## GAIN loss
    D_loss_temp = -tf.reduce_mean(M * tf.log(D_prob + 1e-8) \
                                  + (1-M) * tf.log(1. - D_prob + 1e-8))

    G_loss_temp = -tf.reduce_mean((1 - M) * tf.log(D_prob + 1e-8))

    MSE_loss = \
    tf.reduce_mean((M * X - M * G_sample)**2) / tf.reduce_mean(M)

    D_loss = D_loss_temp
    G_loss = G_loss_temp + alpha * MSE_loss

    ## GAIN solver
    D_solver = tf.train.AdamOptimizer().minimize(D_loss, var_list=theta_D)
    G_solver = tf.train.AdamOptimizer().minimize(G_loss, var_list=theta_G)

    ## Iterations
    sess = tf.Session()
    sess.run(tf.global_variables_initializer())

    # Start Iterations
    for it in tqdm(range(iterations)):

        # Sample batch
        batch_idx = sample_batch_index(no, batch_size)
        X_mb = norm_data_x[batch_idx, :]
        M_mb = data_m[batch_idx, :]
        # Sample random vectors
        Z_mb = uniform_sampler(0, 0.01, batch_size, dim)
        # Sample hint vectors
        H_mb_temp = binary_sampler(hint_rate, batch_size, dim)
        H_mb = M_mb * H_mb_temp

        # Combine random vectors with observed vectors
        X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb

        _, D_loss_curr = sess.run([D_solver, D_loss_temp],
                                  feed_dict={
                                      M: M_mb,
                                      X: X_mb,
                                      H: H_mb
                                  })
        _, G_loss_curr, MSE_loss_curr = \
        sess.run([G_solver, G_loss_temp, MSE_loss],
                 feed_dict = {X: X_mb, M: M_mb, H: H_mb})

    ## Return imputed data
    Z_mb = uniform_sampler(0, 0.01, no, dim)
    M_mb = data_m
    X_mb = norm_data_x
    X_mb = M_mb * X_mb + (1 - M_mb) * Z_mb

    imputed_data = sess.run([G_sample], feed_dict={X: X_mb, M: M_mb})[0]

    imputed_data = data_m * norm_data_x + (1 - data_m) * imputed_data

    # Renormalization
    imputed_data = renormalization(imputed_data, norm_parameters)

    # Rounding
    imputed_data = rounding(imputed_data, data_x)

    return imputed_data
Exemplo n.º 14
0
# Create a simple TF Graph 
# By Omid Alemi - Jan 2017
# Works with TF r1.0

# import tensorflow as tf
import tensorflow._api.v2.compat.v1 as tf
tf.disable_v2_behavior()

I = tf.placeholder(tf.float32, shape=[None,3], name='I') # input
W = tf.Variable(tf.zeros(shape=[3,2]), dtype=tf.float32, name='W') # weights
b = tf.Variable(tf.zeros(shape=[2]), dtype=tf.float32, name='b') # biases
O = tf.nn.relu(tf.matmul(I, W) + b, name='O') # activation / output

saver = tf.train.Saver()
init_op = tf.global_variables_initializer()

with tf.Session() as sess:
  sess.run(init_op)
  
  # save the graph
  tf.train.write_graph(sess.graph_def, '.', 'tfdroid.pbtxt')  

  # normally you would do some training here
  # but fornow we will just assign something to W
  sess.run(tf.assign(W, [[1, 2],[4,5],[7,8]]))
  sess.run(tf.assign(b, [1,1]))

  #save a checkpoint file, which will store the above assignment  
  saver.save(sess, 'tfdroid.ckpt')
  
Exemplo n.º 15
0
    def __init__(self,
                 input_width=227,
                 input_height=227,
                 input_channels=3,
                 num_classes=1000,
                 learning_rate=0.01,
                 momentum=0.9,
                 keep_prob=0.5):

        # From article: The learning rate was initialized at 0.01.
        # From article: We trained our models using stochastic gradient descent with a batch size of 128 examples,
        # momentum of 0.9, and weight decay of 0.0005

        # From article: We initialized the weights in each layer from a zero-mean Gaussian distribution with standard
        # deviation 0.01.

        self.input_width = input_width
        self.input_height = input_height
        self.input_channels = input_channels
        self.num_classes = num_classes
        self.learning_rate = learning_rate
        self.momentum = momentum
        self.keep_prob = keep_prob

        self.random_mean = 0
        self.random_stddev = 0.01

        # ----------------------------------------------------------------------------------------------------

        # From article: We initialized the neuron biases in the second, fourth, and fifth convolutional layers, as well
        # as in the fully-connected hidden layers, with the constant 1. ... We initialized the neuron biases in the
        # remaining layers with the constant 0.

        # Input: 227x227x3.
        with tf.name_scope('input'):
            self.X = tf.placeholder(dtype=tf.float32,
                                    shape=[
                                        None, self.input_height,
                                        self.input_width, self.input_channels
                                    ],
                                    name='X')

        # Labels: 1000.
        with tf.name_scope('labels'):
            self.Y = tf.placeholder(dtype=tf.float32,
                                    shape=[None, self.num_classes],
                                    name='Y')

        # Dropout keep prob.
        with tf.name_scope('dropout'):
            self.dropout_keep_prob = tf.placeholder(dtype=tf.float32,
                                                    shape=(),
                                                    name='dropout_keep_prob')

        # Layer 1.
        # [Input] ==> 227x227x3
        # --> 227x227x3 ==> [Convolution: size=(11x11x3)x96, strides=4, padding=valid] ==> 55x55x96
        # --> 55x55x96 ==> [ReLU] ==> 55x55x96
        # --> 55x55x96 ==> [Local Response Normalization] ==> 55x55x96
        # --> 55x55x96 ==> [Max-Pool: size=3x3, strides=2, padding=valid] ==> 27x27x96
        # --> [Output] ==> 27x27x96
        # Note: 48*2=96, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer1'):
            layer1_activations = self.__conv(
                input=self.X,
                filter_width=11,
                filter_height=11,
                filters_count=96,
                stride_x=4,
                stride_y=4,
                padding='VALID',
                init_biases_with_the_constant_1=False)
            layer1_lrn = self.__local_response_normalization(
                input=layer1_activations)
            layer1_pool = self.__max_pool(input=layer1_lrn,
                                          filter_width=3,
                                          filter_height=3,
                                          stride_x=2,
                                          stride_y=2,
                                          padding='VALID')

        # Layer 2.
        # [Input] ==> 27x27x96
        # --> 27x27x96 ==> [Convolution: size=(5x5x96)x256, strides=1, padding=same] ==> 27x27x256
        # --> 27x27x256 ==> [ReLU] ==> 27x27x256
        # --> 27x27x256 ==> [Local Response Normalization] ==> 27x27x256
        # --> 27x27x256 ==> [Max-Pool: size=3x3, strides=2, padding=valid] ==> 13x13x256
        # --> [Output] ==> 13x13x256
        # Note: 128*2=256, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer2'):
            layer2_activations = self.__conv(
                input=layer1_pool,
                filter_width=5,
                filter_height=5,
                filters_count=256,
                stride_x=1,
                stride_y=1,
                padding='SAME',
                init_biases_with_the_constant_1=True)
            layer2_lrn = self.__local_response_normalization(
                input=layer2_activations)
            layer2_pool = self.__max_pool(input=layer2_lrn,
                                          filter_width=3,
                                          filter_height=3,
                                          stride_x=2,
                                          stride_y=2,
                                          padding='VALID')

        # Layer 3.
        # [Input] ==> 13x13x256
        # --> 13x13x256 ==> [Convolution: size=(3x3x256)x384, strides=1, padding=same] ==> 13x13x384
        # --> 13x13x384 ==> [ReLU] ==> 13x13x384
        # --> [Output] ==> 13x13x384
        # Note: 192*2=384, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer3'):
            layer3_activations = self.__conv(
                input=layer2_pool,
                filter_width=3,
                filter_height=3,
                filters_count=384,
                stride_x=1,
                stride_y=1,
                padding='SAME',
                init_biases_with_the_constant_1=False)

        # Layer 4.
        # [Input] ==> 13x13x384
        # --> 13x13x384 ==> [Convolution: size=(3x3x384)x384, strides=1, padding=same] ==> 13x13x384
        # --> 13x13x384 ==> [ReLU] ==> 13x13x384
        # --> [Output] ==> 13x13x384
        # Note: 192*2=384, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer4'):
            layer4_activations = self.__conv(
                input=layer3_activations,
                filter_width=3,
                filter_height=3,
                filters_count=384,
                stride_x=1,
                stride_y=1,
                padding='SAME',
                init_biases_with_the_constant_1=True)

        # Layer 5.
        # [Input] ==> 13x13x384
        # --> 13x13x384 ==> [Convolution: size=(3x3x384)x256, strides=1, padding=same] ==> 13x13x256
        # --> 13x13x256 ==> [ReLU] ==> 13x13x256
        # --> 13x13x256 ==> [Max-Pool: size=3x3, strides=2, padding=valid] ==> 6x6x256
        # --> [Output] ==> 6x6x256
        # Note: 128*2=256, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer5'):
            layer5_activations = self.__conv(
                input=layer4_activations,
                filter_width=3,
                filter_height=3,
                filters_count=256,
                stride_x=1,
                stride_y=1,
                padding='SAME',
                init_biases_with_the_constant_1=True)
            layer5_pool = self.__max_pool(input=layer5_activations,
                                          filter_width=3,
                                          filter_height=3,
                                          stride_x=2,
                                          stride_y=2,
                                          padding='VALID')

        # Layer 6.
        # [Input] ==> 6x6x256=9216
        # --> 9216 ==> [Fully Connected: neurons=4096] ==> 4096
        # --> 4096 ==> [ReLU] ==> 4096
        # --> 4096 ==> [Dropout] ==> 4096
        # --> [Output] ==> 4096
        # Note: 2048*2=4096, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer6'):
            pool5_shape = layer5_pool.get_shape().as_list()
            flattened_input_size = pool5_shape[1] * pool5_shape[
                2] * pool5_shape[3]
            layer6_fc = self.__fully_connected(
                input=tf.reshape(layer5_pool, shape=[-1,
                                                     flattened_input_size]),
                inputs_count=flattened_input_size,
                outputs_count=4096,
                relu=True,
                init_biases_with_the_constant_1=True)
            layer6_dropout = self.__dropout(input=layer6_fc)

        # Layer 7.
        # [Input] ==> 4096
        # --> 4096 ==> [Fully Connected: neurons=4096] ==> 4096
        # --> 4096 ==> [ReLU] ==> 4096
        # --> 4096 ==> [Dropout] ==> 4096
        # --> [Output] ==> 4096
        # Note: 2048*2=4096, One GPU runs the layer-parts at the top while the other runs the layer-parts at the bottom.
        with tf.name_scope('layer7'):
            layer7_fc = self.__fully_connected(
                input=layer6_dropout,
                inputs_count=4096,
                outputs_count=4096,
                relu=True,
                init_biases_with_the_constant_1=True)
            layer7_dropout = self.__dropout(input=layer7_fc)

        # Layer 8.
        # [Input] ==> 4096
        # --> 4096 ==> [Logits: neurons=1000] ==> 1000
        # --> [Output] ==> 1000
        with tf.name_scope('layer8'):
            layer8_logits = self.__fully_connected(
                input=layer7_dropout,
                inputs_count=4096,
                outputs_count=self.num_classes,
                relu=False,
                name='logits')

        # Cross Entropy.
        with tf.name_scope('cross_entropy'):
            cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(
                logits=layer8_logits, labels=self.Y, name='cross_entropy')
            self.__variable_summaries(cross_entropy)

        # Training.
        with tf.name_scope('training'):
            loss_operation = tf.reduce_mean(cross_entropy,
                                            name='loss_operation')
            tf.summary.scalar(name='loss', tensor=loss_operation)

            optimizer = tf.train.MomentumOptimizer(
                learning_rate=self.learning_rate, momentum=self.momentum)

            # self.training_operation = optimizer.minimize(loss_operation, name='training_operation')

            grads_and_vars = optimizer.compute_gradients(loss_operation)
            self.training_operation = optimizer.apply_gradients(
                grads_and_vars, name='training_operation')

            for grad, var in grads_and_vars:
                if grad is not None:
                    with tf.name_scope(var.op.name + '/gradients'):
                        self.__variable_summaries(grad)

        # Accuracy.
        with tf.name_scope('accuracy'):
            correct_prediction = tf.equal(tf.argmax(layer8_logits, 1),
                                          tf.argmax(self.Y, 1),
                                          name='correct_prediction')
            self.accuracy_operation = tf.reduce_mean(tf.cast(
                correct_prediction, tf.float32),
                                                     name='accuracy_operation')
            tf.summary.scalar(name='accuracy', tensor=self.accuracy_operation)
Exemplo n.º 16
0
def main(trainModel=True,
         buildConfusionMatrix=True,
         restore=False,
         buildClassifiedMatrix=True):

    tf.disable_v2_behavior()

    input_images = tf.placeholder(tf.float32, [None, 28, 28], name="Input")
    real = tf.placeholder(tf.float32, [None, CLASSES], name="real_classes")

    layer1 = create_conv_layer(tf.reshape(input_images, [-1, 28, 28, 1]),
                               1,
                               28, [5, 5], [2, 2],
                               name="conv_no_pool")
    layer2 = create_conv_layer(layer1,
                               28,
                               56, [5, 5], [2, 2],
                               name='conv_with_pool')
    conv_result = tf.reshape(layer2, [-1, 7 * 7 * 56])

    relu_layer_weight = tf.Variable(tf.truncated_normal([7 * 7 * 56, 1000],
                                                        stddev=STDDEV * 2),
                                    name='relu_layer_weight')
    rely_layer_bias = tf.Variable(tf.truncated_normal([1000],
                                                      stddev=STDDEV / 2),
                                  name='rely_layer_bias')
    relu_layer = tf.matmul(conv_result, relu_layer_weight) + rely_layer_bias
    relu_layer = tf.nn.relu(relu_layer)
    relu_layer = tf.nn.dropout(relu_layer, DROPOUT)

    final_layer_weight = tf.Variable(tf.truncated_normal([1000, CLASSES],
                                                         stddev=STDDEV * 2),
                                     name='final_layer_weight')
    final_layer_bias = tf.Variable(tf.truncated_normal([CLASSES],
                                                       stddev=STDDEV / 2),
                                   name='final_layer_bias')
    final_layer = tf.matmul(relu_layer, final_layer_weight) + final_layer_bias

    predicts = tf.nn.softmax(final_layer)
    predicts_for_log = tf.clip_by_value(predicts, 1e-9, 0.999999999)

    #crossEntropy = -tf.reduce_mean(tf.reduce_sum(y * tf.log(y_clipped) + (1 - y) * tf.log(1 - y_clipped), axis=1))

    loss = -tf.reduce_mean(
        tf.reduce_sum(real * tf.log(predicts_for_log) +
                      (1 - real) * tf.log(1 - predicts_for_log),
                      axis=1),
        axis=0)
    #test = tf.reduce_sum(real * tf.log(predicts_for_log) + (1 - real) * tf.log(1 - predicts_for_log), axis=1)
    #loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=final_layer, labels=real))
    optimiser = tf.train.GradientDescentOptimizer(
        learning_rate=LEARNING_RATE).minimize(loss)

    correct_prediction = tf.equal(tf.argmax(real, axis=1),
                                  tf.argmax(predicts, axis=1))
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

    confusion_matrix = tf.confusion_matrix(labels=tf.argmax(real, axis=1),
                                           predictions=tf.argmax(predicts,
                                                                 axis=1),
                                           num_classes=CLASSES)

    saver = tf.train.Saver()

    # dataset = get_mnist_dataset()
    dataset = get_fashion_dataset()

    with tf.Session() as session:

        session.run(tf.global_variables_initializer())

        if restore:
            saver.restore(session, SAVE_PATH)

        if trainModel:
            train(input_images, real, session, optimiser, loss, accuracy,
                  saver, dataset)

        if buildConfusionMatrix:
            test_cm = session.run(confusion_matrix,
                                  feed_dict={
                                      input_images: dataset.test_x,
                                      real: dataset.test_y
                                  })
            draw_confusion_matrix(test_cm)

        if buildClassifiedMatrix:
            all_probs = session.run(predicts,
                                    feed_dict={
                                        input_images: dataset.test_x,
                                        real: dataset.test_y
                                    })
            max_failure_picture_index = [[(-1, -1.0)] * CLASSES
                                         for _ in range(CLASSES)]
            for i in range(len(all_probs)):
                real = np.argmax(dataset.test_y[i])
                for j in range(CLASSES):
                    if max_failure_picture_index[real][j][1] < all_probs[i][j]:
                        max_failure_picture_index[real][j] = (i,
                                                              all_probs[i][j])
            draw_max_failure_pictures(dataset.test_x,
                                      max_failure_picture_index)
Exemplo n.º 17
0
tf.disable_v2_behavior()

model_name = sys.argv[1]
input_count = int(sys.argv[2])
value_output_count = int(sys.argv[3])
action_output_count = int(sys.argv[4])
path_to_store = sys.argv[5]
seed = int(sys.argv[6])

print("INITIALIZING TF MODEL WITH SEED " + str(seed))

tf.reset_default_graph()
tf.random.set_random_seed(seed)

x = tf.placeholder(tf.float64, [None, 2], name='input_node')
target = tf.placeholder(tf.float64, [None, 1], name="target_node")

prediction = tf.identity(x, name="prediction_node")

init = tf.global_variables_initializer()

sess = tf.Session()
sess.run(init)
train_writer = tf.summary.FileWriter(path_to_store + "/summary", sess.graph)
train_writer.close()

with open(os.path.join(path_to_store, model_name + '.pb'), 'wb') as f:
    f.write(tf.get_default_graph().as_graph_def().SerializeToString())

# builder = tf.saved_model.builder.SavedModelBuilder("C:/Users/Snurka/init_model")
import tensorflow._api.v2.compat.v1 as tf
import numpy as np
tf.reset_default_graph()
tf.compat.v1.disable_eager_execution()
tf.compat.v1.disable_v2_behavior()
tf.global_variables_initializer()
x = tf.placeholder(tf.float32, shape=[None, 4])
y = tf.placeholder(tf.float32, shape=[None, 1])
w = tf.Variable(tf.random_normal([4, 1]), name="weight")
b = tf.Variable(tf.random_normal([1]), name="bias")
hypo = tf.matmul(x, w) + b
saver = tf.train.Saver()
test_arr = [[12, 6.5, 15.7, 10.8]]
sess2 = tf.Session()
saver.restore(sess2, "./saved.ckpt")
predict = sess2.run(hypo, feed_dict={x: test_arr})
print(predict[0])
Exemplo n.º 19
0
                name="f1_biases"),
    "f2_biases":
    tf.Variable(tf.truncated_normal(fc_connection_shapes["f2_shape"][3]),
                name="f2_biases"),
    "f3_biases":
    tf.Variable(tf.truncated_normal(fc_connection_shapes["f3_shape"][3]),
                name="f3_biases")
}

dataset_dict["total_image_size"] = dataset_dict["image_size"] * dataset_dict[
    "image_size"]

# Declare the input and output placeholders
input_img = tf.placeholder(tf.float32,
                           shape=[
                               BATCH_SIZE, dataset_dict["image_size"],
                               dataset_dict["image_size"],
                               dataset_dict["num_channels"]
                           ])
img_4d_shaped = tf.reshape(input_img, [
    -1, dataset_dict["image_size"], dataset_dict["image_size"],
    dataset_dict["num_channels"]
])
labels = tf.placeholder(tf.float32, shape=[None, dataset_dict["num_labels"]])

# Convolution Layer 1 | Response Normalization | Max Pooling | ReLU
c_layer_1 = tf.nn.conv2d(img_4d_shaped,
                         conv_weights["c1_weights"],
                         strides=[1, 4, 4, 1],
                         padding="SAME",
                         name="c_layer_1")
c_layer_1 += conv_biases["c1_biases"]
Q_output_count = value_output_count

output_count = Q_output_count + action_output_count

tf.reset_default_graph()
tf.random.set_random_seed(seed)

Relu = tf.nn.relu

Tanh = tf.nn.tanh
BatchNormalization = tf.layers.batch_normalization
Dense = tf.layers.dense
Dropout = tf.nn.dropout


x = tf.placeholder(tf.float64, [None, input_count], name= 'input_node')
target = tf.placeholder(tf.float64, [None, output_count], name = "target_node")
keep_prob = tf.placeholder(tf.float64, [], name = "keep_prob_node")
learning_rate = tf.placeholder(tf.float64, [], name = "learning_rate_node")

Q_target = tf.slice(target, [0, 0], [-1, Q_output_count], name = "Q_slice_node")
action_target = tf.slice(target, [0, Q_output_count], [-1, action_output_count], name = "action_slice_node")

hidden_1 = Dense(x,        hidden_count_1, tf.nn.relu, use_bias = True, kernel_initializer = tf.glorot_normal_initializer(), name = "hidden_1")

Q_output_raw      = tf.layers.dense(hidden_1, Q_output_count,                   use_bias = True, kernel_initializer = tf.zeros_initializer, bias_initializer = tf.zeros_initializer, name = 'Q_output_node')
Q_output_ = tf.multiply(Q_output_raw, 100000)
Q_output_ = tf.round(Q_output_)
Q_output = tf.div(Q_output_, 100000)

action_output_raw = tf.layers.dense(hidden_1, action_output_count, tf.nn.softmax, use_bias = True, kernel_initializer = tf.zeros_initializer, bias_initializer = tf.zeros_initializer, name = "action_output_node")
Exemplo n.º 21
0
def conv2d(x, W):
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')


def max_pool_2x2(x):
    return tf.nn.max_pool(x,
                          ksize=[1, 2, 2, 1],
                          strides=[1, 2, 2, 1],
                          padding='SAME')


mnist = input_data.read_data_sets("data/", one_hot=True)

g = tf.Graph()
with g.as_default():
    x = tf.placeholder("float", shape=[None, 784])
    y_ = tf.placeholder("float", shape=[None, 10])

    W_conv1 = weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])
    x_image = tf.reshape(x, [-1, 28, 28, 1])
    h_conv1 = tf.nn.relu(conv2d(x_image, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)

    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)

    W_fc1 = weight_variable([7 * 7 * 64, 1024])
    b_fc1 = bias_variable([1024])