def inference(input_placeholder): with tf.name_scope("inference") as scope: W = tf.Variable(tf.zeros([3, 3]), name="weight") b = tf.Variable(tf.zeros([3]), name="bias") y = tf.nn.softmax(tf.matmul(input_placeholder, W) + b) return y
def __init__(self, input_size, num_hidden, minibatch_size, name='lstmcell'): """ Constructs an LSTM Cell Parameters: ---------- input_size: int the size of the single input vector to the cell num_hidden: int the number of hidden nodes in the cell minibatch_size: int the number of the input vectors in the input matrix """ LSTMCell.created_count += 1 self.id = name + '_' + str(LSTMCell.created_count) if name == 'lstmcell' else name self.input_size = input_size self.num_hidden = num_hidden self.minibatch_size = minibatch_size self.input_weights = tf.Variable(tf.truncated_normal([self.input_size, self.num_hidden * 4], -0.1, 0.1), name=self.id + '_wi') self.output_weights = tf.Variable(tf.truncated_normal([self.num_hidden, self.num_hidden * 4], -0.1, 0.1), name=self.id + '_wo') self.bias = tf.Variable(tf.zeros([self.num_hidden * 4]), name=self.id + '_b') self.prev_output = tf.Variable(tf.zeros([self.minibatch_size, self.num_hidden]), trainable=False, name=self.id+'_o') self.prev_state = tf.Variable(tf.zeros([self.minibatch_size, self.num_hidden]), trainable=False, name=self.id+'_s')
def testGradient(self): with self.test_session(use_gpu=self._use_gpu, graph=tf.Graph()) as sess: batch_size = 1 cell_size = 3 input_size = 2 # Inputs x = tf.zeros([batch_size, input_size]) h = tf.zeros([batch_size, cell_size]) output = gru_ops.GRUBlockCell(cell_size)(x, h) sess.run([tf.initialize_all_variables()]) all_variables = tf.all_variables() [w_ru, b_ru, w_c, b_c] = all_variables[:4] error_x = tf.test.compute_gradient_error(x, (batch_size, input_size), output[0], (batch_size, cell_size)) error_h = tf.test.compute_gradient_error(h, (batch_size, cell_size), output[0], (batch_size, cell_size)) error_w_ru = tf.test.compute_gradient_error( w_ru, (input_size + cell_size, 2 * cell_size), output[0], (batch_size, cell_size) ) error_w_c = tf.test.compute_gradient_error( w_c, (input_size + cell_size, cell_size), output[0], (batch_size, cell_size) ) error_b_ru = tf.test.compute_gradient_error(b_ru, (2 * cell_size,), output[0], (batch_size, cell_size)) error_b_c = tf.test.compute_gradient_error(b_c, (cell_size,), output[0], (batch_size, cell_size)) eps = 1e-4 self.assertLess(error_x, eps) self.assertLess(error_h, eps) self.assertLess(error_w_ru, eps) self.assertLess(error_w_c, eps) self.assertLess(error_b_ru, eps) self.assertLess(error_b_c, eps)
def make_variable_dict(max_age, max_gender): # TODO(sibyl-toe9oF2e): Figure out how to derive max_age & max_gender from # examples_dict. age_weights = tf.Variable(tf.zeros([max_age + 1], dtype=tf.float32)) gender_weights = tf.Variable(tf.zeros([max_gender + 1], dtype=tf.float32)) return dict(sparse_features_weights=[age_weights, gender_weights], dense_features_weights=[])
def testBlockGRUToGRUCellSingleStep(self): with self.test_session(use_gpu=self._use_gpu, graph=tf.Graph()) as sess: batch_size = 4 cell_size = 5 input_size = 6 seed = 1994 initializer = tf.random_uniform_initializer(-0.01, 0.01, seed=seed) # Inputs x = tf.zeros([batch_size, input_size]) h = tf.zeros([batch_size, cell_size]) # Values for the inputs. x_value = np.random.rand(batch_size, input_size) h_value = np.random.rand(batch_size, cell_size) # Output from the basic GRU cell implementation. with tf.variable_scope("basic", initializer=initializer): output = tf.nn.rnn_cell.GRUCell(cell_size)(x, h) sess.run([tf.initialize_all_variables()]) basic_res = sess.run([output], {x: x_value, h: h_value}) # Output from the block GRU cell implementation. with tf.variable_scope("block", initializer=initializer): output = gru_ops.GRUBlockCell(cell_size)(x, h) sess.run([tf.initialize_all_variables()]) block_res = sess.run([output], {x: x_value, h: h_value}) self.assertEqual(len(block_res), len(basic_res)) for block, basic in zip(block_res, basic_res): self.assertAllClose(block, basic)
def moving_average(value, window): value = tf.to_float(value) shape = value.get_shape() queue_init = tf.zeros(tf.TensorShape(window).concatenate(shape)) total_init = tf.zeros(shape) num_init = tf.constant(0, dtype=tf.float32) queue = tf.FIFOQueue(window, [tf.float32], shapes=[shape]) total = tf.Variable(total_init, trainable=False) num = tf.Variable(num_init, trainable=False) init = tf.cond( tf.equal(queue.size(), 0), lambda: tf.group( queue.enqueue_many(queue_init), total.assign(total_init), num.assign(num_init)), lambda: tf.no_op()) with tf.control_dependencies([init]): total_ = total + value - queue.dequeue() num_ = num + 1 value_averaged = total_ / (tf.minimum(num_, window) + EPSILON) with tf.control_dependencies([queue.enqueue([value]), total.assign(total_), num.assign(num_)]): return tf.identity(value_averaged)
def __init__(self, dim_image, n_words, dim_hidden, batch_size, n_lstm_steps, drop_out_rate, bias_init_vector=None): self.dim_image = dim_image self.n_words = n_words self.dim_hidden = dim_hidden self.batch_size = batch_size self.n_lstm_steps = n_lstm_steps self.drop_out_rate = drop_out_rate with tf.device("/cpu:0"): self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_hidden], -0.1, 0.1), name='Wemb') #self.Wemb_W = tf.Variable(tf.random_uniform([n_words, dim_hidden], -0.1, 0.1), name='Wemb_W') #self.Wemb_b = tf.Variable(tf.random_uniform([dim_hidden], -0.1, 0.1), name='Wemb_b') #self.lstm3 = rnn_cell.BasicLSTMCell(dim_hidden) self.lstm3 = rnn_cell.LSTMCell(self.dim_hidden,2*self.dim_hidden,use_peepholes = True) self.lstm3_dropout = rnn_cell.DropoutWrapper(self.lstm3,output_keep_prob=1 - self.drop_out_rate) self.encode_image_W = tf.Variable( tf.random_uniform([dim_image, dim_hidden], -0.1, 0.1), name='encode_image_W') self.encode_image_b = tf.Variable( tf.zeros([dim_hidden]), name='encode_image_b') self.embed_att_w = tf.Variable(tf.random_uniform([dim_hidden, 1], -0.1,0.1), name='embed_att_w') self.embed_att_Wa = tf.Variable(tf.random_uniform([dim_hidden, dim_hidden], -0.1,0.1), name='embed_att_Wa') self.embed_att_Ua = tf.Variable(tf.random_uniform([dim_hidden, dim_hidden],-0.1,0.1), name='embed_att_Ua') self.embed_att_ba = tf.Variable( tf.zeros([dim_hidden]), name='embed_att_ba') self.embed_word_W = tf.Variable(tf.random_uniform([dim_hidden, n_words], -0.1,0.1), name='embed_word_W') if bias_init_vector is not None: self.embed_word_b = tf.Variable(bias_init_vector.astype(np.float32), name='embed_word_b') else: self.embed_word_b = tf.Variable(tf.zeros([n_words]), name='embed_word_b') self.embed_nn_Wp = tf.Variable(tf.random_uniform([3*dim_hidden, dim_hidden], -0.1,0.1), name='embed_nn_Wp') self.embed_nn_bp = tf.Variable(tf.zeros([dim_hidden]), name='embed_nn_bp')
def inference(images, hidden1_units): """Build the MNIST model up to where it may be used for inference. Args: images: Images placeholder, from inputs(). hidden1_units: Size of the first hidden layer. hidden2_units: Size of the second hidden layer. Returns: softmax_linear: Output tensor with the computed logits. """ # Hidden 1 with tf.name_scope('hidden1'): weights = tf.Variable( tf.truncated_normal([IMAGE_PIXELS, hidden1_units], stddev=1.0 / math.sqrt(float(IMAGE_PIXELS))), name='weights') biases = tf.Variable(tf.zeros([hidden1_units]), name='biases') hidden1 = tf.nn.relu(tf.matmul(images, weights) + biases) # Hidden 2 # Linear with tf.name_scope('softmax_linear'): weights = tf.Variable( tf.truncated_normal([hidden1_units, NUM_CLASSES], stddev=1.0 / math.sqrt(float(hidden1_units))), name='weights') biases = tf.Variable(tf.zeros([NUM_CLASSES]), name='biases') logits = tf.matmul(hidden1, weights) + biases return logits
def get_idx_map(shape): """Get index map for a image. Args: shape: [B, T, H, W] or [B, H, W] Returns: idx: [B, T, H, W, 2], or [B, H, W, 2] """ s = shape ndims = tf.shape(s) wdim = ndims - 1 hdim = ndims - 2 idx_shape = tf.concat(0, [s, tf.constant([1])]) ones_h = tf.ones(hdim - 1, dtype='int32') ones_w = tf.ones(wdim - 1, dtype='int32') h_shape = tf.concat(0, [ones_h, tf.constant([-1]), tf.constant([1, 1])]) w_shape = tf.concat(0, [ones_w, tf.constant([-1]), tf.constant([1])]) idx_y = tf.zeros(idx_shape, dtype='float') idx_x = tf.zeros(idx_shape, dtype='float') h = tf.slice(s, ndims - 2, [1]) w = tf.slice(s, ndims - 1, [1]) idx_y += tf.reshape(tf.to_float(tf.range(h[0])), h_shape) idx_x += tf.reshape(tf.to_float(tf.range(w[0])), w_shape) idx = tf.concat(ndims[0], [idx_y, idx_x]) return idx
def testSampleFromDiscretizedMixLogistic(self): batch = 2 height = 4 width = 4 num_mixtures = 5 seed = 42 logits = tf.concat( # assign all probability mass to first component [tf.ones([batch, height, width, 1]) * 1e8, tf.zeros([batch, height, width, num_mixtures - 1])], axis=-1) locs = tf.random_uniform([batch, height, width, num_mixtures * 3], minval=-.9, maxval=.9) log_scales = tf.ones([batch, height, width, num_mixtures * 3]) * -1e8 coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3])) pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1) locs_0 = locs[..., :3] expected_sample = tf.clip_by_value(locs_0, -1., 1.) actual_sample = common_layers.sample_from_discretized_mix_logistic( pred, seed=seed) actual_sample_val, expected_sample_val = self.evaluate( [actual_sample, expected_sample]) # Use a low tolerance: samples numerically differ, as the actual # implementation clips log-scales so they always contribute to sampling. self.assertAllClose(actual_sample_val, expected_sample_val, atol=1e-2)
def __init__(self, dim_image, n_words, dim_hidden, batch_size, n_lstm_steps, drop_out_rate, bias_init_vector=None): self.dim_image = dim_image self.n_words = n_words self.dim_hidden = dim_hidden self.batch_size = batch_size self.n_lstm_steps = n_lstm_steps self.drop_out_rate = drop_out_rate with tf.device("/gpu:2"): self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_hidden], -0.1, 0.1), name='Wemb') # self.lstm1 = rnn_cell.BasicLSTMCell(dim_hidden) # self.lstm2 = rnn_cell.BasicLSTMCell(dim_hidden) self.lstm1 = rnn_cell.LSTMCell(self.dim_hidden,self.dim_hidden,use_peepholes = True) self.lstm1_dropout = rnn_cell.DropoutWrapper(self.lstm1,output_keep_prob=1 - self.drop_out_rate) self.lstm2 = rnn_cell.LSTMCell(self.dim_hidden,self.dim_hidden,use_peepholes = True) self.lstm2_dropout = rnn_cell.DropoutWrapper(self.lstm2,output_keep_prob=1 - self.drop_out_rate) # W is Weight, b is Bias self.encode_image_W = tf.Variable( tf.random_uniform([dim_image, dim_hidden], -0.1, 0.1), name='encode_image_W') self.encode_image_b = tf.Variable( tf.zeros([dim_hidden]), name='encode_image_b') self.embed_word_W = tf.Variable(tf.random_uniform([dim_hidden, n_words], -0.1,0.1), name='embed_word_W') if bias_init_vector is not None: self.embed_word_b = tf.Variable(bias_init_vector.astype(np.float32), name='embed_word_b') else: self.embed_word_b = tf.Variable(tf.zeros([n_words]), name='embed_word_b')
def __init__(self, n_input, n_latent, n_hidden_enc): # initialize network self.prng = numpy.random.RandomState() sigma_init = 0.01 x = tf.placeholder(tf.float32, [None, n_input], name='input') # encoder # x->hidden layer W_xh = tf.Variable(tf.random_normal([n_input, n_hidden_enc], mean=0., stddev=sigma_init, dtype=tf.float32)) b_xh = tf.Variable(tf.zeros([n_hidden_enc], dtype=tf.float32)) # hidden layer -> latent variables (mu & log sigma^2) W_hmu = tf.Variable(tf.random_normal([n_hidden_enc, n_latent], mean=0., stddev=sigma_init, dtype=tf.float32)) b_hsigma = tf.Variable(tf.zeros([n_latent], dtype=tf.float32)) # decoder W_zx = tf.Variable(tf.random_normal([n_latent, n_input], mean=0., stddev=sigma_init, dtype=tf.float32)) b_zx = tf.Variable(tf.zeros([n_input], dtype=tf.float32)) # create functions h_encoder = tf.nn.relu(tf.mat_mul(x, W_xh) + b_xh) pass
def testDiscretizedMixLogisticLoss(self): batch = 2 height = 4 width = 4 channels = 3 num_mixtures = 5 logits = tf.concat( # assign all probability mass to first component [tf.ones([batch, height, width, 1]) * 1e8, tf.zeros([batch, height, width, num_mixtures - 1])], axis=-1) locs = tf.random_uniform([batch, height, width, num_mixtures * 3], minval=-.9, maxval=.9) log_scales = tf.random_uniform([batch, height, width, num_mixtures * 3], minval=-1., maxval=1.) coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3])) pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1) # Test labels that don't satisfy edge cases where 8-bit value is 0 or 255. labels = tf.random_uniform([batch, height, width, channels], minval=-.9, maxval=.9) locs_0 = locs[..., :3] log_scales_0 = log_scales[..., :3] centered_labels = labels - locs_0 inv_stdv = tf.exp(-log_scales_0) plus_in = inv_stdv * (centered_labels + 1. / 255.) min_in = inv_stdv * (centered_labels - 1. / 255.) cdf_plus = tf.nn.sigmoid(plus_in) cdf_min = tf.nn.sigmoid(min_in) expected_loss = -tf.reduce_sum(tf.log(cdf_plus - cdf_min), axis=-1) actual_loss = common_layers.discretized_mix_logistic_loss( pred=pred, labels=labels) actual_loss_val, expected_loss_val = self.evaluate( [actual_loss, expected_loss]) self.assertAllClose(actual_loss_val, expected_loss_val, rtol=1e-5)
def recover_feeling(checkpoint): #设置变量 in_sentence = tf.placeholder(tf.float32, [None, 140]) weight = tf.Variable(tf.zeros([140, 6])) biases = tf.Variable(tf.zeros([6])) global_step = tf.Variable(0, name='global_step', trainable=False) #y = softmax(Wx + b) y = tf.nn.softmax(tf.matmul(in_sentence, weight) + biases) y_ = tf.placeholder(tf.float32, [None, 6]) sess = tf.InteractiveSession() #恢复模型. saver = tf.train.Saver() saver.restore(sess, checkpoint) #读取模型完毕,加载词表 vocab, tmp_vocab = read_vocabulary('data/emotion/vocabulary.txt') #把一些句子转化为vec的array vec_list1 = convert_sentence_to_vec_list('你今天感觉怎么样', vocab, 140) vec_list2 = convert_sentence_to_vec_list('高兴啊', vocab, 140) vec_list_final = [np.array(vec_list1), np.array(vec_list2)] print (vec_list_final) #交给sess进行检查 data = np.array(vec_list_final) result = sess.run(y, feed_dict={in_sentence: data}) print (result)
def main(): sess = tf.Session() # 2進数3ビットから10進数 x = tf.placeholder(tf.float32, [None, 3]) w = tf.Variable(tf.zeros([3, 8])) b = tf.Variable(tf.zeros([8])) y = tf.nn.softmax(tf.matmul(x, w) + b) y_ = tf.placeholder(tf.float32, [None, 8]) cross_entropy = -tf.reduce_sum(y_ * tf.log(y)) train_step = tf.train.GradientDescentOptimizer(0.05).minimize(cross_entropy) sess.run(tf.initialize_all_variables()) for i in range(1000): train_step.run({x: [[0, 0, 0]], y_: [[1, 0, 0, 0, 0, 0, 0, 0]]}, session=sess) train_step.run({x: [[1, 0, 0]], y_: [[0, 1, 0, 0, 0, 0, 0, 0]]}, session=sess) train_step.run({x: [[0, 1, 0]], y_: [[0, 0, 1, 0, 0, 0, 0, 0]]}, session=sess) train_step.run({x: [[1, 1, 0]], y_: [[0, 0, 0, 1, 0, 0, 0, 0]]}, session=sess) train_step.run({x: [[0, 0, 1]], y_: [[0, 0, 0, 0, 1, 0, 0, 0]]}, session=sess) train_step.run({x: [[1, 0, 1]], y_: [[0, 0, 0, 0, 0, 1, 0, 0]]}, session=sess) train_step.run({x: [[0, 1, 1]], y_: [[0, 0, 0, 0, 0, 0, 1, 0]]}, session=sess) train_step.run({x: [[1, 1, 1]], y_: [[0, 0, 0, 0, 0, 0, 0, 1]]}, session=sess) ## 1に近い予測があってるか 平均 #correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) #accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) print(sess.run(y, feed_dict={x: [[0, 0, 0]]})) print(sess.run(y, feed_dict={x: [[1, 0, 0]]})) print(sess.run(y, feed_dict={x: [[0, 1, 0]]})) print(sess.run(y, feed_dict={x: [[1, 1, 0]]})) print(sess.run(y, feed_dict={x: [[0, 0, 1]]})) return 0
def testLSTMFusedCell(self): with self.test_session(use_gpu=self._use_gpu, graph=tf.Graph()) as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) m0 = tf.zeros([1, 2]) m1 = tf.zeros([1, 2]) m2 = tf.zeros([1, 2]) m3 = tf.zeros([1, 2]) g, ((out_m0, out_m1), (out_m2, out_m3)) = tf.nn.rnn_cell.MultiRNNCell( [tf.contrib.rnn.LSTMFusedCell(2)] * 2, state_is_tuple=True)(x, ((m0, m1), (m2, m3))) sess.run([tf.initialize_all_variables()]) res = sess.run([g, out_m0, out_m1, out_m2, out_m3], {x.name: np.array([[1., 1.]]), m0.name: 0.1 * np.ones([1, 2]), m1.name: 0.1 * np.ones([1, 2]), m2.name: 0.1 * np.ones([1, 2]), m3.name: 0.1 * np.ones([1, 2])}) self.assertEqual(len(res), 5) self.assertAllClose(res[0], [[0.24024698, 0.24024698]]) # These numbers are from testBasicLSTMCell and only test c/h. self.assertAllClose(res[1], [[0.68967271, 0.68967271]]) self.assertAllClose(res[2], [[0.44848421, 0.44848421]]) self.assertAllClose(res[3], [[0.39897051, 0.39897051]]) self.assertAllClose(res[4], [[0.24024698, 0.24024698]])
def autoencoder_contd(input_dim, representation): x = tf.placeholder(tf.float32, [None, input_dim]); high_decW=tf.Variable( initial_value=tf.random_normal( [representation,input_dim], -math.sqrt(6.0/(input_dim+representation)), math.sqrt(6.0/(input_dim+representation))), dtype=tf.float32, name='high_decW'); # high_encW=tf.transpose(high_decW); high_encW=tf.Variable( initial_value=tf.random_normal( [input_dim, representation], -math.sqrt(6.0/(input_dim+representation)), math.sqrt(6.0/(input_dim+representation))), name='high_encW'); high_encb=tf.Variable(tf.zeros([representation]), name='high_encb'); z=tf.nn.sigmoid(tf.matmul(x,high_encW) + high_encb); hidden_weights=high_encW; high_decb=tf.Variable( tf.zeros([input_dim]), name='high_decb'); y=tf.nn.sigmoid(tf.matmul(z,high_decW)+high_decb); cost=tf.nn.l2_loss(x-y); loss_per_pixel=tf.reduce_mean(tf.abs(x-y)); return {'x':x,'z':z,'y':y,'cost':cost, 'weights':hidden_weights, 'encW':high_encW,'decW':high_decW, 'encb':high_encb,'decb':high_decb, 'ppx':loss_per_pixel };
def testGridLSTMCell(self): with self.test_session() as sess: num_units = 8 state_size = num_units * 2 batch_size = 3 input_size = 4 feature_size = 2 frequency_skip = 1 num_shifts = (input_size - feature_size) / frequency_skip + 1 with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([batch_size, input_size]) m = tf.zeros([batch_size, state_size*num_shifts]) output, state = tf.contrib.rnn.GridLSTMCell( num_units=num_units, feature_size=feature_size, frequency_skip=frequency_skip, forget_bias=1.0)(x, m) sess.run([tf.initialize_all_variables()]) res = sess.run([output, state], {x.name: np.array([[1., 1., 1., 1.], [2., 2., 2., 2.], [3., 3., 3., 3.]]), m.name: 0.1 * np.ones((batch_size, state_size*( num_shifts)))}) self.assertEqual(len(res), 2) # The numbers in results were not calculated, this is mostly just a # smoke test. self.assertEqual(res[0].shape, (batch_size, num_units*num_shifts*2)) self.assertEqual(res[1].shape, (batch_size, state_size*num_shifts)) # Different inputs so different outputs and states for i in range(1, batch_size): self.assertTrue( float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) > 1e-6) self.assertTrue( float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) > 1e-6)
def autoencoder(d,c=5,tied_weights=False): ''' An autoencoder network with one hidden layer (containing the encoding), and sigmoid activation functions. Args: d: dimension of input. c: dimension of code. tied_weights: True if w1^T=w2 Returns: Dictionary containing input placeholder Tensor and loss Variable Raises: ''' inputs = tf.placeholder(tf.float32, shape=[None,d], name='input') w1 = tf.Variable(tf.truncated_normal([d,c], stddev=1.0/math.sqrt(d))) b1 = tf.Variable(tf.zeros([c])) w2 = tf.Variable(tf.truncated_normal([c,d], stddev=1.0/math.sqrt(c))) # TODO: Implement tied weights b2 = tf.Variable(tf.zeros([d])) code = tf.nn.sigmoid(tf.matmul(inputs, w1)+b1, name='encoding') reconstruction = tf.nn.sigmoid(tf.matmul(code, w2)+b2, name='reconstruction') loss = tf.reduce_mean(tf.square(reconstruction - inputs)) tf.scalar_summary('loss', loss) return {'inputs': inputs, 'loss': loss}
def testBasicLSTMCell(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) m = tf.zeros([1, 8]) g, out_m = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.BasicLSTMCell(2)] * 2)(x, m) sess.run([tf.initialize_all_variables()]) res = sess.run([g, out_m], {x.name: np.array([[1., 1.]]), m.name: 0.1 * np.ones([1, 8])}) self.assertEqual(len(res), 2) # The numbers in results were not calculated, this is just a smoke test. self.assertAllClose(res[0], [[0.24024698, 0.24024698]]) expected_mem = np.array([[0.68967271, 0.68967271, 0.44848421, 0.44848421, 0.39897051, 0.39897051, 0.24024698, 0.24024698]]) self.assertAllClose(res[1], expected_mem) with tf.variable_scope("other", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 3]) # Test BasicLSTMCell with input_size != num_units. m = tf.zeros([1, 4]) g, out_m = tf.nn.rnn_cell.BasicLSTMCell(2, input_size=3)(x, m) sess.run([tf.initialize_all_variables()]) res = sess.run([g, out_m], {x.name: np.array([[1., 1., 1.]]), m.name: 0.1 * np.ones([1, 4])}) self.assertEqual(len(res), 2)
def testCoupledInputForgetGateLSTMCell(self): with self.test_session() as sess: num_units = 2 state_size = num_units * 2 batch_size = 3 input_size = 4 expected_output = np.array( [[0.121753, 0.121753], [0.103349, 0.103349], [0.100178, 0.100178]], dtype=np.float32) expected_state = np.array( [[0.137523, 0.137523, 0.121753, 0.121753], [0.105450, 0.105450, 0.103349, 0.103349], [0.100742, 0.100742, 0.100178, 0.100178]], dtype=np.float32) with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([batch_size, input_size]) m = tf.zeros([batch_size, state_size]) output, state = tf.contrib.rnn.CoupledInputForgetGateLSTMCell( num_units=num_units, forget_bias=1.0)(x, m) sess.run([tf.initialize_all_variables()]) res = sess.run([output, state], {x.name: np.array([[1., 1., 1., 1.], [2., 2., 2., 2.], [3., 3., 3., 3.]]), m.name: 0.1 * np.ones((batch_size, state_size))}) # This is a smoke test: Only making sure expected values didn't change. self.assertEqual(len(res), 2) self.assertAllClose(res[0], expected_output) self.assertAllClose(res[1], expected_state)
def testLSTMCell(self): with self.test_session() as sess: num_units = 8 num_proj = 6 state_size = num_units + num_proj batch_size = 3 input_size = 2 with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([batch_size, input_size]) m = tf.zeros([batch_size, state_size]) output, state = tf.nn.rnn_cell.LSTMCell( num_units=num_units, input_size=input_size, num_proj=num_proj, forget_bias=1.0)(x, m) sess.run([tf.initialize_all_variables()]) res = sess.run([output, state], {x.name: np.array([[1., 1.], [2., 2.], [3., 3.]]), m.name: 0.1 * np.ones((batch_size, state_size))}) self.assertEqual(len(res), 2) # The numbers in results were not calculated, this is mostly just a # smoke test. self.assertEqual(res[0].shape, (batch_size, num_proj)) self.assertEqual(res[1].shape, (batch_size, state_size)) # Different inputs so different outputs and states for i in range(1, batch_size): self.assertTrue( float(np.linalg.norm((res[0][0, :] - res[0][i, :]))) > 1e-6) self.assertTrue( float(np.linalg.norm((res[1][0, :] - res[1][i, :]))) > 1e-6)
def inference(images,hidden1_units,hidden2_units): """建立前馈神经网络模型 Args: images:输入图像数据 hidden1_units:第一个隐藏层的神经元数目 hidden2_units:第二个隐藏层 的神经元数目 returns: softmax_linear:输出张量为计算后的结果 """ #隐藏层1 with tf.name_scope('hidden1'): weights = tf.Variable(tf.truncated_normal([IMAGE_PIXELS,hidden1_units],stddev=1.0/math.sqrt(float(IMAGE_PIXELS))),name='weights')#? biases = tf.Variable(tf.zeros([hidden1_units]),name='biases') hidden1 = tf.nn.relu(tf.matmul(images,weights)+biases) #隐藏层2 with tf.name_scope('hidden2'): weights = tf.Variable(tf.truncated_normal([hidden1_units,hidden2_units],stddev=1.0/math.sqrt(float(hidden1_units))),name='weights') biases = tf.Variable(tf.zeros([hidden2_units]),name='biases') hidden2 = tf.nn.relu(tf.matmul(hidden1,weights)+biases) #线性输出层 with tf.name_scope('softmax_linear'): weights = tf.Variable(tf.truncated_normal([hidden2_units,NUM_CLASSES]),name='biases') biases = tf.Variable(tf.zeros([NUM_CLASSES]),name='biases') logits = tf.matmul(hidden2,weights) + biases return logits
def testBasicLSTMCellStateTupleType(self): with self.test_session(): with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) m0 = (tf.zeros([1, 2]),) * 2 m1 = (tf.zeros([1, 2]),) * 2 cell = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.BasicLSTMCell(2)] * 2, state_is_tuple=True) self.assertTrue(isinstance(cell.state_size, tuple)) self.assertTrue(isinstance(cell.state_size[0], tf.nn.rnn_cell.LSTMStateTuple)) self.assertTrue(isinstance(cell.state_size[1], tf.nn.rnn_cell.LSTMStateTuple)) # Pass in regular tuples _, (out_m0, out_m1) = cell(x, (m0, m1)) self.assertTrue(isinstance(out_m0, tf.nn.rnn_cell.LSTMStateTuple)) self.assertTrue(isinstance(out_m1, tf.nn.rnn_cell.LSTMStateTuple)) # Pass in LSTMStateTuples tf.get_variable_scope().reuse_variables() zero_state = cell.zero_state(1, tf.float32) self.assertTrue(isinstance(zero_state, tuple)) self.assertTrue(isinstance(zero_state[0], tf.nn.rnn_cell.LSTMStateTuple)) self.assertTrue(isinstance(zero_state[1], tf.nn.rnn_cell.LSTMStateTuple)) _, (out_m0, out_m1) = cell(x, zero_state) self.assertTrue( isinstance(out_m0, tf.nn.rnn_cell.LSTMStateTuple)) self.assertTrue( isinstance(out_m1, tf.nn.rnn_cell.LSTMStateTuple))
def __init__(self, name, input_size, output_size): with tf.name_scope("rbm_" + name): self.weights = tf.Variable( tf.truncated_normal([input_size, output_size], stddev=1.0 / math.sqrt(float(input_size))), name="weights") self.v_bias = tf.Variable(tf.zeros([input_size]), name="v_bias") self.h_bias = tf.Variable(tf.zeros([output_size]), name="h_bias")
def model(images, inits, num_iterations=4, num_patches=68, patch_shape=(24, 24), num_channels=3): batch_size = images.get_shape().as_list()[0] hidden_state = tf.zeros((batch_size, 512)) dx = tf.zeros((batch_size, num_patches, 2)) endpoints = {} dxs = [] for step in range(num_iterations): with tf.device('/cpu:0'): patches = tf.image.extract_patches(images, tf.constant(patch_shape), inits+dx) patches = tf.reshape(patches, (batch_size * num_patches, patch_shape[0], patch_shape[1], num_channels)) endpoints['patches'] = patches with tf.variable_scope('convnet', reuse=step>0): net = conv_model(patches) ims = net['concat'] ims = tf.reshape(ims, (batch_size, -1)) with tf.variable_scope('rnn', reuse=step>0) as scope: hidden_state = slim.ops.fc(tf.concat(1, [ims, hidden_state]), 512, activation=tf.tanh) prediction = slim.ops.fc(hidden_state, num_patches * 2, scope='pred', activation=None) endpoints['prediction'] = prediction prediction = tf.reshape(prediction, (batch_size, num_patches, 2)) dx += prediction dxs.append(dx) return inits + dx, dxs, endpoints
def testGradientsAsVariables(self): for dtype in [tf.half, tf.float32, tf.float64]: with self.test_session() as sess: var0 = tf.Variable([1.0, 2.0], dtype=dtype) var1 = tf.Variable([3.0, 4.0], dtype=dtype) cost = 5 * var0 + 3 * var1 global_step = tf.Variable(tf.zeros([], tf.int64), name='global_step') sgd_op = tf.train.GradientDescentOptimizer(3.0) grads_and_vars = sgd_op.compute_gradients(cost, [var0, var1]) # Convert gradients to tf.Variables converted_grads = [ tf.Variable(tf.zeros([2], dtype)) for i in grads_and_vars ] convert_ops = [ tf.assign(converted_grads[i], gv[0]) for i, gv in enumerate(grads_and_vars) ] converted_grads_and_vars = list(zip(converted_grads, [var0, var1])) opt_op = sgd_op.apply_gradients(converted_grads_and_vars, global_step) tf.global_variables_initializer().run() # Run convert_ops to achieve the gradietns converting sess.run(convert_ops) # Fetch params to validate initial values self.assertAllClose([1.0, 2.0], var0.eval()) self.assertAllClose([3.0, 4.0], var1.eval()) # Run 1 step of sgd through optimizer opt_op.run() # Validate updated params self.assertAllClose([-14., -13.], var0.eval()) self.assertAllClose([-6., -5.], var1.eval())
def testBasicLSTMCellWithStateTuple(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) m0 = tf.zeros([1, 4]) m1 = tf.zeros([1, 4]) cell = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.BasicLSTMCell(2)] * 2, state_is_tuple=True) g, (out_m0, out_m1) = cell(x, (m0, m1)) sess.run([tf.initialize_all_variables()]) res = sess.run([g, out_m0, out_m1], {x.name: np.array([[1., 1.]]), m0.name: 0.1 * np.ones([1, 4]), m1.name: 0.1 * np.ones([1, 4])}) self.assertEqual(len(res), 3) # The numbers in results were not calculated, this is just a smoke test. # Note, however, these values should match the original # version having state_is_tuple=False. self.assertAllClose(res[0], [[0.24024698, 0.24024698]]) expected_mem0 = np.array([[0.68967271, 0.68967271, 0.44848421, 0.44848421]]) expected_mem1 = np.array([[0.39897051, 0.39897051, 0.24024698, 0.24024698]]) self.assertAllClose(res[1], expected_mem0) self.assertAllClose(res[2], expected_mem1)
def testCompatibleNames(self): with self.test_session(use_gpu=self._use_gpu, graph=tf.Graph()): cell = tf.nn.rnn_cell.LSTMCell(10) pcell = tf.nn.rnn_cell.LSTMCell(10, use_peepholes=True) inputs = [tf.zeros([4, 5])] * 6 tf.nn.rnn(cell, inputs, dtype=tf.float32, scope="basic") tf.nn.rnn(pcell, inputs, dtype=tf.float32, scope="peephole") basic_names = {v.name: v.get_shape() for v in tf.trainable_variables()} with self.test_session(use_gpu=self._use_gpu, graph=tf.Graph()): cell = tf.contrib.rnn.LSTMBlockCell(10, use_compatible_names=True) pcell = tf.contrib.rnn.LSTMBlockCell( 10, use_peephole=True, use_compatible_names=True) inputs = [tf.zeros([4, 5])] * 6 tf.nn.rnn(cell, inputs, dtype=tf.float32, scope="basic") tf.nn.rnn(pcell, inputs, dtype=tf.float32, scope="peephole") block_names = {v.name: v.get_shape() for v in tf.trainable_variables()} with self.test_session(use_gpu=self._use_gpu, graph=tf.Graph()): cell = tf.contrib.rnn.LSTMBlockFusedCell(10) pcell = tf.contrib.rnn.LSTMBlockFusedCell(10, use_peephole=True) inputs = [tf.zeros([4, 5])] * 6 cell(inputs, dtype=tf.float32, scope="basic/LSTMCell") pcell(inputs, dtype=tf.float32, scope="peephole/LSTMCell") fused_names = {v.name: v.get_shape() for v in tf.trainable_variables()} self.assertEqual(basic_names, block_names) self.assertEqual(basic_names, fused_names)
def testMultiRNNCellWithStateTuple(self): with self.test_session() as sess: with tf.variable_scope("root", initializer=tf.constant_initializer(0.5)): x = tf.zeros([1, 2]) m_bad = tf.zeros([1, 4]) m_good = (tf.zeros([1, 2]), tf.zeros([1, 2])) # Test incorrectness of state with self.assertRaisesRegexp(ValueError, "Expected state .* a tuple"): tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.GRUCell(2)] * 2, state_is_tuple=True)(x, m_bad) _, ml = tf.nn.rnn_cell.MultiRNNCell( [tf.nn.rnn_cell.GRUCell(2)] * 2, state_is_tuple=True)(x, m_good) sess.run([tf.initialize_all_variables()]) res = sess.run(ml, {x.name: np.array([[1., 1.]]), m_good[0].name: np.array([[0.1, 0.1]]), m_good[1].name: np.array([[0.1, 0.1]])}) # The numbers in results were not calculated, this is just a # smoke test. However, these numbers should match those of # the test testMultiRNNCell. self.assertAllClose(res[0], [[0.175991, 0.175991]]) self.assertAllClose(res[1], [[0.13248, 0.13248]])
def embedding(inputs, vocab_size, num_units, zero_pad=True, scale=True, l2_reg=0.0, scope="embedding", with_t=False, reuse=None): '''Embeds a given tensor. Args: inputs: A `Tensor` with type `int32` or `int64` containing the ids to be looked up in `lookup table`. vocab_size: An int. Vocabulary size. num_units: An int. Number of embedding hidden units. zero_pad: A boolean. If True, all the values of the fist row (id 0) should be constant zeros. scale: A boolean. If True. the outputs is multiplied by sqrt num_units. scope: Optional scope for `variable_scope`. reuse: Boolean, whether to reuse the weights of a previous layer by the same name. Returns: A `Tensor` with one more rank than inputs's. The last dimensionality should be `num_units`. For example, ``` import tensorflow as tf inputs = tf.to_int32(tf.reshape(tf.range(2*3), (2, 3))) outputs = embedding(inputs, 6, 2, zero_pad=True) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) print sess.run(outputs) >> [[[ 0. 0. ] [ 0.09754146 0.67385566] [ 0.37864095 -0.35689294]] [[-1.01329422 -1.09939694] [ 0.7521342 0.38203377] [-0.04973143 -0.06210355]]] ``` ``` import tensorflow as tf inputs = tf.to_int32(tf.reshape(tf.range(2*3), (2, 3))) outputs = embedding(inputs, 6, 2, zero_pad=False) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) print sess.run(outputs) >> [[[-0.19172323 -0.39159766] [-0.43212751 -0.66207761] [ 1.03452027 -0.26704335]] [[-0.11634696 -0.35983452] [ 0.50208133 0.53509563] [ 1.22204471 -0.96587461]]] ``` ''' with tf.variable_scope(scope, reuse=reuse): lookup_table = tf.get_variable('lookup_table', dtype=tf.float32, shape=[vocab_size, num_units], #initializer=tf.contrib.layers.xavier_initializer(), regularizer=tf.contrib.layers.l2_regularizer(l2_reg)) if zero_pad: lookup_table = tf.concat((tf.zeros(shape=[1, num_units]), lookup_table[1:, :]), 0) outputs = tf.nn.embedding_lookup(lookup_table, inputs) if scale: outputs = outputs * (num_units ** 0.5) if with_t: return outputs,lookup_table else: return outputs
u = tf.cast(e[0][0], tf.int32) v = tf.cast(e[0][1], tf.int32) y_t = [] h = tf.placeholder(dtype = tf.float32, shape = [1, 10]) for i in range(84): temp1 = tf.gather_nd(adj, (i, u)) temp2 = tf.gather(y, u) temp3 = tf.gather_nd(adj, (i,v)) temp4 = tf.gather(y, v) temp5 = tf.multiply(temp1, temp2) temp6 = tf.multiply(temp3, temp4) intermediate = tf.add_n([temp5, temp6]) # print "Debug intermediate size", intermediate.get_shape(), h_inter.get_shape() # intermediate = tf.add_n(tf.multiply(tf.gather_nd(self.adj, (i, u)), tf.gather(y, u)), tf.multiply(tf.gather_nd(self.adj, (i,v)), tf.gather(y, v))) first = tf.concat([[y[i]], [intermediate], h], axis=1) second = tf.concat([tf.zeros([1, 2 * 1]), h], axis=1) y_t.append(tf.concat([tf.add(first, second), b], axis = 1)) c = tf.stack(y_t) sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) #h = tf.placeholder(dtype = tf.float32, shape = [1]) # Runs the op. summary_writer = tf.summary.FileWriter('logs/' + datetime.now().isoformat().replace(':', '-'), sess.graph) print("len", len(tf.get_default_graph().get_operations())) t1 = time.time() print(sess.run([c], feed_dict = {y:np.ones([84, 1]), b:np.zeros([1, 5]), h:np.ones([1, 10]), e:[[1,2,0,0]], adj:np.ones([84, 84])})) t2 = time.time()
def __init__(self, data_dir, embed_dim=100, combination_method='simple', dropout=0.5, neg_weight=0.5): if combination_method.lower() not in ['simple', 'matrix']: raise NotImplementedError( "ProjE does not support using %s as combination method." % combination_method) self.__combination_method = combination_method self.__embed_dim = embed_dim self.__initialized = False self.__trainable = list() self.__dropout = dropout # with codecs.open(os.path.join(data_dir, 'entity2id.txt'), 'r', encoding='utf-8') as f: # self.__n_entity = len(f.readlines()) # with codecs.open(os.path.join(data_dir, 'entity2id.txt'), 'r', encoding='utf-8') as f: # self.__entity_id_map = {x.strip().split('\t')[0]: int(x.strip().split('\t')[1]) for x in f.readlines()} # self.__id_entity_map = {v: k for k, v in self.__entity_id_map.items()} wikidata, reverse_dict, item_data, prop_data, wikidata_fanout_dict, child_par_dict = load_wikidata( ) print len(wikidata) # global wikidata # wikidata_key_sample = random.sample(wikidata.keys(),50000) # wikidata = dict((k, wikidata[k]) for k in wikidata_key_sample) self.__n_entity = len(wikidata) print("N_ENTITY: %d" % self.__n_entity) # with codecs.open(os.path.join(data_dir, 'relation2id.txt'), 'r', encoding='utf-8') as f: # self.__n_relation = len(f.readlines()) self.__n_relation = len(prop_data) # with codecs.open(os.path.join(data_dir, 'relation2id.txt'), 'r', encoding='utf-8') as f: # self.__relation_id_map = {x.strip().split('\t')[0]: int(x.strip().split('\t')[1]) for x in f.readlines()} # self.__id_relation_map = {v: k for k, v in self.__entity_id_map.items()} self.__relation_id_map = { pid: i for i, pid in enumerate(prop_data.keys()) } self.__entity_id_map = { qid: i for i, qid in enumerate(wikidata.keys()) } self.__id_relation_map = { i: pid for i, pid in enumerate(prop_data.keys()) } self.__id_entity_map = { i: qid for i, qid in enumerate(wikidata.keys()) } print("N_RELATION: %d" % self.__n_relation) # def load_triple(file_path): # with codecs.open(file_path, 'r', encoding='utf-8') as f_triple: # return np.asarray([[self.__entity_id_map[x.strip().split('\t')[0]], # self.__entity_id_map[x.strip().split('\t')[1]], # self.__relation_id_map[x.strip().split('\t')[2]]] for x in f_triple.readlines()], # dtype=np.int32) def load_triple(): triples_arr = [] for QID in wikidata: for pid in [p for p in wikidata[QID] if p in prop_data]: for qid in [ q for q in wikidata[QID][pid] if q in child_par_dict and q in self.__entity_id_map ]: triples_arr.append([ self.__entity_id_map[QID], self.__entity_id_map[qid], self.__relation_id_map[pid] ]) if len(triples_arr) > 100000: return np.asarray(triples_arr) return np.asarray(triples_arr) def gen_hr_t(triple_data): hr_t = dict() for h, t, r in triple_data: if h not in hr_t: hr_t[h] = dict() if r not in hr_t[h]: hr_t[h][r] = set() hr_t[h][r].add(t) return hr_t def gen_tr_h(triple_data): tr_h = dict() for h, t, r in triple_data: if t not in tr_h: tr_h[t] = dict() if r not in tr_h[t]: tr_h[t][r] = set() tr_h[t][r].add(h) return tr_h triples_arr = load_triple() idx = np.random.permutation(np.arange(triples_arr.shape[0])) self.__train_triple = triples_arr[:int(0.7 * idx.shape[0])] self.__valid_triple = triples_arr[int(0.7 * idx.shape[0]):int(0.8 * idx.shape[0])] self.__test_triple = triples_arr[int(0.8 * idx.shape[0]):] # self.__train_triple = load_triple(os.path.join(data_dir, 'train.txt')) print("N_TRAIN_TRIPLES: %d" % self.__train_triple.shape[0]) # self.__test_triple = load_triple(os.path.join(data_dir, 'test.txt')) print("N_TEST_TRIPLES: %d" % self.__test_triple.shape[0]) # self.__valid_triple = load_triple(os.path.join(data_dir, 'valid.txt')) print("N_VALID_TRIPLES: %d" % self.__valid_triple.shape[0]) self.__train_hr_t = gen_hr_t(self.__train_triple) self.__train_tr_h = gen_tr_h(self.__train_triple) self.__test_hr_t = gen_hr_t(self.__test_triple) self.__test_tr_h = gen_tr_h(self.__test_triple) print 'flag 1' self.__hr_t = gen_hr_t( np.concatenate( [self.__train_triple, self.__test_triple, self.__valid_triple], axis=0)) print 'flag 2' self.__tr_h = gen_tr_h( np.concatenate( [self.__train_triple, self.__test_triple, self.__valid_triple], axis=0)) print 'flag 3' bound = 6 / math.sqrt(embed_dim) with tf.device('/cpu'): self.__ent_embedding = tf.get_variable( "ent_embedding", [self.__n_entity, embed_dim], initializer=tf.random_uniform_initializer(minval=-bound, maxval=bound, seed=345)) self.__trainable.append(self.__ent_embedding) self.__rel_embedding = tf.get_variable( "rel_embedding", [self.__n_relation, embed_dim], initializer=tf.random_uniform_initializer(minval=-bound, maxval=bound, seed=346)) self.__trainable.append(self.__rel_embedding) print 'flag 4' if combination_method.lower() == 'simple': self.__hr_weighted_vector = tf.get_variable( "simple_hr_combination_weights", [embed_dim * 2], initializer=tf.random_uniform_initializer(minval=-bound, maxval=bound, seed=445)) print 'flag 5' self.__tr_weighted_vector = tf.get_variable( "simple_tr_combination_weights", [embed_dim * 2], initializer=tf.random_uniform_initializer(minval=-bound, maxval=bound, seed=445)) print 'flag 6' self.__trainable.append(self.__hr_weighted_vector) self.__trainable.append(self.__tr_weighted_vector) self.__hr_combination_bias = tf.get_variable( "combination_bias_hr", initializer=tf.zeros([embed_dim])) self.__tr_combination_bias = tf.get_variable( "combination_bias_tr", initializer=tf.zeros([embed_dim])) print 'flag 7' self.__trainable.append(self.__hr_combination_bias) self.__trainable.append(self.__tr_combination_bias) else: self.__hr_combination_matrix = tf.get_variable( "matrix_hr_combination_layer", [embed_dim * 2, embed_dim], initializer=tf.random_uniform_initializer(minval=-bound, maxval=bound, seed=555)) self.__tr_combination_matrix = tf.get_variable( "matrix_tr_combination_layer", [embed_dim * 2, embed_dim], initializer=tf.random_uniform_initializer(minval=-bound, maxval=bound, seed=555)) self.__trainable.append(self.__hr_combination_matrix) self.__trainable.append(self.__tr_combination_matrix) self.__hr_combination_bias = tf.get_variable( "combination_bias_hr", initializer=tf.zeros([embed_dim])) self.__tr_combination_bias = tf.get_variable( "combination_bias_tr", initializer=tf.zeros([embed_dim])) self.__trainable.append(self.__hr_combination_bias) self.__trainable.append(self.__tr_combination_bias)
# DONOTCHANGE: They are reserved for nsml args.add_argument('--mode', type=str, default='train', help='submit일때 해당값이 test로 설정됩니다.') args.add_argument('--iteration', type=str, default='0', help='fork 명령어를 입력할때의 체크포인트로 설정됩니다. 체크포인트 옵션을 안주면 마지막 wall time 의 model 을 가져옵니다.') args.add_argument('--pause', type=int, default=0, help='model 을 load 할때 1로 설정됩니다.') config = args.parse_args() with tf.Graph().as_default(): # placeholder is used for feeding data. x = tf.placeholder("float", shape=[None, FEATURE_DIM]) # none represents variable length of dimension. y_target = tf.placeholder("float", shape=[None, OUTPUT_DIM]) # shape argument is optional, but this is useful to debug. W1 = tf.Variable(tf.zeros([FEATURE_DIM, OUTPUT_DIM])) b1 = tf.Variable(tf.zeros([OUTPUT_DIM])) y = tf.matmul(x, W1) + b1 delta = (y - y_target) L1_delta = delta[:, 0] L2_delta = delta[:, 1] MSE_loss = tf.reduce_mean(0.3* L1_delta*L1_delta + 0.7*L2_delta*L2_delta) train_step = tf.train.GradientDescentOptimizer(0.01).minimize(MSE_loss) sess = tf.Session() # open a session which is a envrionment of computation graph. sess.run(tf.global_variables_initializer())# initialize the variables # Bind model run_params = [y, x]
def __init__(self, root_embeddings: tf.Tensor, tree_def: TreeDefinition, target_trees: T.List[Tree] = None): super(self.__class__, self).__init__() self.is_supervised = target_trees is not None self.root_embeddings = root_embeddings self.target_trees = target_trees self.decoded_trees = [] # populated by the decoder self.tree_def = tree_def self.deferred_value_types = {} for nt in tree_def.node_types: if nt.value_type is not None: self.counters['vals_' + nt.id] = 1 self.deferred_value_types[nt.id] = self.BuildDeferredRepresentationValueType(nt) # supervised call if self.is_supervised: self.depths = {'embs': []} def init(n: Tree, depth=0): nonlocal self n.meta['dec_batch'] = self n.meta['node_numb'] = self.counters['embs'] self.counters['embs'] += 1 self.depths['embs'].append(depth) if n.value is not None: k = 'vals_' + n.node_type_id n.meta['value_numb'] = self.counters[k] if k not in self.depths.keys(): self.depths[k] = [] self.depths[k].append(depth) self.counters[k] += 1 self.map_to_all_nodes(init, target_trees) self['embs'] = tf.zeros([self.counters['embs'] + 1, root_embeddings.shape[1]]) # save the initial embeddings in the store se we can easily gather them afterwards self.scatter_update('embs', [t.meta['node_numb'] for t in target_trees], root_embeddings) _depths = {k: tf.convert_to_tensor(self.depths[k], dtype=tf.float32) for k in self.depths.keys()} self.depths = _depths self.max_depths = {k: tf.reduce_max(self.depths[k]) for k in self.depths.keys()} scale_start, scale_end, scale_exp, max_depth = 1.0, 0.0, 2.0, 70 self.depth_scale_coefs = { k: scale_start + (scale_end - scale_start) * (self.depths[k] / max_depth) ** scale_exp for k in self.depths.keys() } # don't need to index them, never gather nor rewrite them - only used altoghether for the loss. # thus incrementally stack-up into a constant # root distrib is saved with some zero padding # more over some are not associated to a real node, artificial nodes used to train the model # to -not- generate a node (special no-child/no-node) self.distribs_unscaled = [] self.distribs_idx = [] for nt in tree_def.node_types: if nt.value_type is not None: self['vals_'+nt.id] = tf.zeros([self.counters['vals_'+nt.id] + 1,nt.value_type.representation_shape]) # unsupervised call else: self['embs'] = tf.zeros([1, root_embeddings.shape[1]]) for nt in tree_def.node_types: if nt.value_type is not None: self['vals_'+nt.id] = tf.zeros([1, nt.value_type.representation_shape])
# -*- coding: utf-8 -*- import tensorflow as tf sess = tf.Session() x = tf.placeholder("float", shape=[None, 10], name="input") W = tf.Variable(tf.truncated_normal([10, 5], stddev=0.05)) b = tf.Variable(tf.zeros([5])) y = tf.nn.softmax(tf.matmul(x, W) + b, name="output") sess.run(tf.initialize_all_variables())
def main(argv=None): # pylint: disable=unused-argument data_dir = './training/training/' train_data_filename = data_dir + 'images/' train_labels_filename = data_dir + 'groundtruth/' # Extract it into numpy arrays. train_data = extract_data(train_data_filename, TRAINING_SIZE) train_labels = extract_labels(train_labels_filename, TRAINING_SIZE) num_epochs = NUM_EPOCHS c0 = 0 c1 = 0 for i in range(len(train_labels)): if train_labels[i][0] == 1: c0 = c0 + 1 else: c1 = c1 + 1 print('Number of data points per class: c0 = ' + str(c0) + ' c1 = ' + str(c1)) print('Balancing training data...') min_c = min(c0, c1) idx0 = [i for i, j in enumerate(train_labels) if j[0] == 1] idx1 = [i for i, j in enumerate(train_labels) if j[1] == 1] new_indices = idx0[0:min_c] + idx1[0:min_c] print(len(new_indices)) print(train_data.shape) train_data = train_data[new_indices, :, :, :] train_labels = train_labels[new_indices] train_size = train_labels.shape[0] c0 = 0 c1 = 0 for i in range(len(train_labels)): if train_labels[i][0] == 1: c0 = c0 + 1 else: c1 = c1 + 1 print('Number of data points per class: c0 = ' + str(c0) + ' c1 = ' + str(c1)) # This is where training samples and labels are fed to the graph. # These placeholder nodes will be fed a batch of training data at each # training step using the {feed_dict} argument to the Run() call below. train_data_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, IMG_PATCH_SIZE, IMG_PATCH_SIZE, NUM_CHANNELS)) train_labels_node = tf.placeholder(tf.float32, shape=(BATCH_SIZE, NUM_LABELS)) train_all_data_node = tf.constant(train_data) # The variables below hold all the trainable weights. They are passed an # initial value which will be assigned when when we call: # {tf.initialize_all_variables().run()} conv1_weights = tf.Variable( tf.truncated_normal( [5, 5, NUM_CHANNELS, 32], # 5x5 filter, depth 32. stddev=0.1, seed=SEED)) conv1_biases = tf.Variable(tf.zeros([32])) conv2_weights = tf.Variable( tf.truncated_normal([5, 5, 32, 64], stddev=0.1, seed=SEED)) conv2_biases = tf.Variable(tf.constant(0.1, shape=[64])) fc1_weights = tf.Variable( # fully connected, depth 512. tf.truncated_normal( [int(IMG_PATCH_SIZE / 4 * IMG_PATCH_SIZE / 4 * 64), 512], stddev=0.1, seed=SEED)) fc1_biases = tf.Variable(tf.constant(0.1, shape=[512])) fc2_weights = tf.Variable( tf.truncated_normal([512, NUM_LABELS], stddev=0.1, seed=SEED)) fc2_biases = tf.Variable(tf.constant(0.1, shape=[NUM_LABELS])) # Make an image summary for 4d tensor image with index idx def get_image_summary(img, idx=0): V = tf.slice(img, (0, 0, 0, idx), (1, -1, -1, 1)) img_w = img.get_shape().as_list()[1] img_h = img.get_shape().as_list()[2] min_value = tf.reduce_min(V) V = V - min_value max_value = tf.reduce_max(V) V = V / (max_value * PIXEL_DEPTH) V = tf.reshape(V, (img_w, img_h, 1)) V = tf.transpose(V, (2, 0, 1)) V = tf.reshape(V, (-1, img_w, img_h, 1)) return V # Make an image summary for 3d tensor image with index idx def get_image_summary_3d(img): V = tf.slice(img, (0, 0, 0), (1, -1, -1)) img_w = img.get_shape().as_list()[1] img_h = img.get_shape().as_list()[2] V = tf.reshape(V, (img_w, img_h, 1)) V = tf.transpose(V, (2, 0, 1)) V = tf.reshape(V, (-1, img_w, img_h, 1)) return V # Get prediction for given input image def get_prediction(img): data = numpy.asarray(img_crop(img, IMG_PATCH_SIZE, IMG_PATCH_SIZE)) data_node = tf.constant(data) output = tf.nn.softmax(model(data_node)) output_prediction = s.run(output) img_prediction = label_to_img(img.shape[0], img.shape[1], IMG_PATCH_SIZE, IMG_PATCH_SIZE, output_prediction) return img_prediction # Get a concatenation of the prediction and groundtruth for given input file def get_prediction_with_groundtruth(filename, image_idx): imageid = "satImage_%.3d" % image_idx image_filename = filename + imageid + ".png" img = mpimg.imread(image_filename) img_prediction = get_prediction(img) cimg = concatenate_images(img, img_prediction) return cimg # Get prediction overlaid on the original image for given input file def get_prediction_with_overlay(filename, image_idx): imageid = "satImage_%.3d" % image_idx image_filename = filename + imageid + ".png" img = mpimg.imread(image_filename) img_prediction = get_prediction(img) oimg = make_img_overlay(img, img_prediction) return oimg # We will replicate the model structure for the training subgraph, as well # as the evaluation subgraphs, while sharing the trainable parameters. def model(data, train=False): """The Model definition.""" # 2D convolution, with 'SAME' padding (i.e. the output feature map has # the same size as the input). Note that {strides} is a 4D array whose # shape matches the data layout: [image index, y, x, depth]. conv = tf.nn.conv2d(data, conv1_weights, strides=[1, 1, 1, 1], padding='SAME') # Bias and rectified linear non-linearity. relu = tf.nn.relu(tf.nn.bias_add(conv, conv1_biases)) # Max pooling. The kernel size spec {ksize} also follows the layout of # the data. Here we have a pooling window of 2, and a stride of 2. pool = tf.nn.max_pool(relu, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') conv2 = tf.nn.conv2d(pool, conv2_weights, strides=[1, 1, 1, 1], padding='SAME') relu2 = tf.nn.relu(tf.nn.bias_add(conv2, conv2_biases)) pool2 = tf.nn.max_pool(relu2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME') # Uncomment these lines to check the size of each layer # print 'data ' + str(data.get_shape()) # print 'conv ' + str(conv.get_shape()) # print 'relu ' + str(relu.get_shape()) # print 'pool ' + str(pool.get_shape()) # print 'pool2 ' + str(pool2.get_shape()) # Reshape the feature map cuboid into a 2D matrix to feed it to the # fully connected layers. pool_shape = pool2.get_shape().as_list() reshape = tf.reshape( pool2, [pool_shape[0], pool_shape[1] * pool_shape[2] * pool_shape[3]]) # Fully connected layer. Note that the '+' operation automatically # broadcasts the biases. hidden = tf.nn.relu(tf.matmul(reshape, fc1_weights) + fc1_biases) # Add a 50% dropout during training only. Dropout also scales # activations such that no rescaling is needed at evaluation time. #if train: # hidden = tf.nn.dropout(hidden, 0.5, seed=SEED) out = tf.matmul(hidden, fc2_weights) + fc2_biases if train == True: summary_id = '_0' s_data = get_image_summary(data) filter_summary0 = tf.summary.image('summary_data' + summary_id, s_data) s_conv = get_image_summary(conv) filter_summary2 = tf.summary.image('summary_conv' + summary_id, s_conv) s_pool = get_image_summary(pool) filter_summary3 = tf.summary.image('summary_pool' + summary_id, s_pool) s_conv2 = get_image_summary(conv2) filter_summary4 = tf.summary.image('summary_conv2' + summary_id, s_conv2) s_pool2 = get_image_summary(pool2) filter_summary5 = tf.summary.image('summary_pool2' + summary_id, s_pool2) return out # Training computation: logits + cross-entropy loss. logits = model(train_data_node, True) # BATCH_SIZE*NUM_LABELS # print 'logits = ' + str(logits.get_shape()) + ' train_labels_node = ' + str(train_labels_node.get_shape()) loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=train_labels_node)) tf.summary.scalar('loss', loss) all_params_node = [ conv1_weights, conv1_biases, conv2_weights, conv2_biases, fc1_weights, fc1_biases, fc2_weights, fc2_biases ] all_params_names = [ 'conv1_weights', 'conv1_biases', 'conv2_weights', 'conv2_biases', 'fc1_weights', 'fc1_biases', 'fc2_weights', 'fc2_biases' ] all_grads_node = tf.gradients(loss, all_params_node) all_grad_norms_node = [] for i in range(0, len(all_grads_node)): norm_grad_i = tf.global_norm([all_grads_node[i]]) all_grad_norms_node.append(norm_grad_i) tf.summary.scalar(all_params_names[i], norm_grad_i) # L2 regularization for the fully connected parameters. regularizers = (tf.nn.l2_loss(fc1_weights) + tf.nn.l2_loss(fc1_biases) + tf.nn.l2_loss(fc2_weights) + tf.nn.l2_loss(fc2_biases)) # Add the regularization term to the loss. loss += 5e-4 * regularizers # Optimizer: set up a variable that's incremented once per batch and # controls the learning rate decay. batch = tf.Variable(0) # Decay once per epoch, using an exponential schedule starting at 0.01. learning_rate = tf.train.exponential_decay( 0.01, # Base learning rate. batch * BATCH_SIZE, # Current index into the dataset. train_size, # Decay step. 0.95, # Decay rate. staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Use simple momentum for the optimization. optimizer = tf.train.MomentumOptimizer(learning_rate, 0.0).minimize(loss, global_step=batch) # Predictions for the minibatch, validation set and test set. train_prediction = tf.nn.softmax(logits) # We'll compute them only once in a while by calling their {eval()} method. train_all_prediction = tf.nn.softmax(model(train_all_data_node)) # Add ops to save and restore all the variables. saver = tf.train.Saver() # Create a local session to run this computation. with tf.Session() as s: if RESTORE_MODEL: # Restore variables from disk. saver.restore(s, FLAGS.train_dir + "/model.ckpt") print("Model restored.") else: # Run all the initializers to prepare the trainable parameters. tf.initialize_all_variables().run() # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph_def=s.graph_def) print('Initialized!') # Loop through training steps. print('Total number of iterations = ' + str(int(num_epochs * train_size / BATCH_SIZE))) training_indices = range(train_size) for iepoch in range(num_epochs): # Permute training indices perm_indices = numpy.random.permutation(training_indices) for step in range(int(train_size / BATCH_SIZE)): offset = (step * BATCH_SIZE) % (train_size - BATCH_SIZE) batch_indices = perm_indices[offset:(offset + BATCH_SIZE)] # Compute the offset of the current minibatch in the data. # Note that we could use better randomization across epochs. batch_data = train_data[batch_indices, :, :, :] batch_labels = train_labels[batch_indices] # This dictionary maps the batch data (as a numpy array) to the # node in the graph is should be fed to. feed_dict = { train_data_node: batch_data, train_labels_node: batch_labels } if step % RECORDING_STEP == 0: summary_str, _, l, lr, predictions = s.run( [ summary_op, optimizer, loss, learning_rate, train_prediction ], feed_dict=feed_dict) #summary_str = s.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() # print_predictions(predictions, batch_labels) print('Epoch %.2f' % (float(step) * BATCH_SIZE / train_size)) print('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr)) print('Minibatch error: %.1f%%' % error_rate(predictions, batch_labels)) sys.stdout.flush() else: # Run the graph and fetch some of the nodes. _, l, lr, predictions = s.run( [optimizer, loss, learning_rate, train_prediction], feed_dict=feed_dict) # Save the variables to disk. save_path = saver.save(s, FLAGS.train_dir + "/model.ckpt") print("Model saved in file: %s" % save_path) print("Running prediction on training set") prediction_training_dir = "predictions_training/" if not os.path.isdir(prediction_training_dir): os.mkdir(prediction_training_dir) for i in range(1, TRAINING_SIZE + 1): pimg = get_prediction_with_groundtruth(train_data_filename, i) Image.fromarray(pimg).save(prediction_training_dir + "prediction_" + str(i) + ".png") oimg = get_prediction_with_overlay(train_data_filename, i) oimg.save(prediction_training_dir + "overlay_" + str(i) + ".png")
mnist = input_data.read_data_sets('mnist_data', one_hot = True) # size of batch batch_size = 100 #all size n_batch = mnist.train.num_examples // batch_size #define two placeholder x = tf.placeholder(tf.float32,[None, 784]) y = tf.placeholder(tf.float32,[None, 10]) lr = tf.Variable(0.001, dtype = tf.float32) keep_prob = tf.placeholder(tf.float32) #define nn W1 = tf.Variable(tf.truncated_normal([784,500], stddev = 0.1)) b1 = tf.Variable(tf.zeros([500]) + 0.1) L1 = tf.nn.tanh(tf.matmul(x,W1) + b1) L1_drop = tf.nn.dropout(L1, keep_prob) W2 = tf.Variable(tf.truncated_normal([500,300], stddev = 0.1)) b2 = tf.Variable(tf.zeros([300]) + 0.1) L2 = tf.nn.tanh(tf.matmul(L1_drop,W2) + b2) L2_drop = tf.nn.dropout(L2, keep_prob) W3 = tf.Variable(tf.truncated_normal([300,100], stddev = 0.1)) b3 = tf.Variable(tf.zeros([100]) + 0.1) L3 = tf.nn.tanh(tf.matmul(L2_drop,W3) + b3) L3_drop = tf.nn.dropout(L3, keep_prob) W4 = tf.Variable(tf.truncated_normal([100,10], stddev = 0.1)) b4 = tf.Variable(tf.zeros([10]) + 0.1)
def main(): image_lists = create_image_lists(TEST_PERCENTAGE, VALIDATION_PERCENTAGE) n_classes = len(image_lists.keys()) # 读取已经训练好的Inception-v3模型。谷歌训练好的模型包吃住了GraphDef ProtocolBuffer中,里面报错了每一个节点取值的计算方法及其变量的取值 # TensorFlow模型持久化的问题在第5章中有详细的计算 with gfile.FastGFile(os.path.join(MODEL_DIR, MODEL_FILE), 'rb') as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) # 加载读取的Inception-v3模型,并返回数据输入所对应的张量以及计算瓶颈层结果对应的张量 bottleneck_tensor, jpeg_data_tensor = tf.import_graph_def( graph_def, return_elements=[BOTTLENECK_TENSOR_NAME, JPEG_DATA_TENSOR_NAME]) # 定义新的神经网络输入,这个输入就是新的图片经过Inception-v3模型前向传播到达瓶颈层 # 时的取值。可以将这个过程类似的理解为一种特征提取 bottleneck_input = tf.placeholder(tf.float32, [None, BOTTLENECK_TENSOR_SIZE], name='BottleneckInputPlaceholder') # 定义新的标准答案输入 ground_truth_input = tf.placeholder(tf.float32, [None, n_classes], name='GroundTruthInput') # 定义一层全链接层来解决新的图片分类问题。因为训练好的Inception-v3模型已经将原始的图片抽象为了 # 更加容易分类的特征向量了,所有不需要再训练那么复杂的神经网络来完成新的分类任务 with tf.name_scope('final_training_ops'): weights = tf.Variable(tf.truncated_normal([BOTTLENECK_TENSOR_SIZE, n_classes], stddev=0.001)) biases = tf.Variable(tf.zeros([n_classes])) logits = tf.matmul(bottleneck_input, weights) + biases final_tensor = tf.nn.softmax(logits) # 定义交叉熵损失函数。 cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=ground_truth_input) cross_entropy_mean = tf.reduce_mean(cross_entropy) train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(cross_entropy_mean) # 计算正确率。 with tf.name_scope('evaluation'): correct_prediction = tf.equal(tf.argmax(final_tensor, 1), tf.argmax(ground_truth_input, 1)) evaluation_step = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) # 训练过程。 for i in range(STEPS): # 每次获取一个batch的训练数据 train_bottlenecks, train_ground_truth = get_random_cached_bottlenecks( sess, n_classes, image_lists, BATCH, 'training', jpeg_data_tensor, bottleneck_tensor) sess.run(train_step, feed_dict={bottleneck_input: train_bottlenecks, ground_truth_input: train_ground_truth}) # 在验证数据上测试正确率 if i % 100 == 0 or i + 1 == STEPS: validation_bottlenecks, validation_ground_truth = get_random_cached_bottlenecks( sess, n_classes, image_lists, BATCH, 'validation', jpeg_data_tensor, bottleneck_tensor) validation_accuracy = sess.run(evaluation_step, feed_dict={ bottleneck_input: validation_bottlenecks, ground_truth_input: validation_ground_truth}) print('Step %d: Validation accuracy on random sampled %d examples = %.1f%%' % (i, BATCH, validation_accuracy * 100)) # 在最后的测试数据上测试正确率。 test_bottlenecks, test_ground_truth = get_test_bottlenecks( sess, image_lists, n_classes, jpeg_data_tensor, bottleneck_tensor) test_accuracy = sess.run(evaluation_step, feed_dict={ bottleneck_input: test_bottlenecks, ground_truth_input: test_ground_truth}) print('Final test accuracy = %.1f%%' % (test_accuracy * 100))
def initial_cell_state(self, batch: int) -> Tuple[tf.Tensor]: if self.use_rnn: return tuple( tf.zeros((batch, self.h_dim), dtype=tf.float32) for _ in range(self.cell_nums)) return (None, )
graph = tf.Graph() with graph.as_default(): # Input data. train_dataset = tf.placeholder(tf.int32, shape=[batch_size]) train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) valid_dataset = tf.constant(valid_examples, dtype=tf.int32) # Variables. embeddings = tf.Variable( tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) softmax_weights = tf.Variable( tf.truncated_normal([vocabulary_size, embedding_size], stddev=1.0 / math.sqrt(embedding_size))) softmax_biases = tf.Variable(tf.zeros([vocabulary_size]), dtype=tf.float32) # Model. # Look up embeddings for inputs. embed = tf.nn.embedding_lookup(embeddings, train_dataset) # Compute the softmax loss, using a sample of the negative labels each time. loss = tf.reduce_mean(tf.nn.sampled_softmax_loss(softmax_weights, softmax_biases, embed,train_labels, num_sampled, vocabulary_size)) # Optimizer. # Note: The optimizer will optimize the softmax_weights AND the embeddings. # This is because the embeddings are defined as a variable quantity and the # optimizer's `minimize` method will by default modify all variable quantities # that contribute to the tensor it is passed. # See docs on `tf.train.Optimizer.minimize()` for more details. optimizer = tf.train.AdagradOptimizer(1.0).minimize(loss)
def word2vec(dataset): """ Build the graph for word2vec model and train it """ # Step 1: create iterator and get input, output from the dataset iterator = dataset.make_initializable_iterator() center_words, target_words = iterator.get_next() ############################# ########## TO DO ############ ############################# # Step 2: define weights. # In word2vec, it's the weights that we care about embed_matrix = tf.get_variable('embed_matrix', shape = [VOCAB_SIZE, EMBED_SIZE], initializer = tf.random_uniform_initializer()) ############################# ########## TO DO ############ ############################# # Step 3: define the inference (embedding lookup) embed = tf.nn.embedding_lookup(embed_matrix, center_words, name='embed') ############################# ########## TO DO ############ ############################# # Step 4: define loss function # construct variables for NCE loss nce_weight = tf.get_variable('nce_weight', shape = [VOCAB_SIZE, EMBED_SIZE], initializer = tf.truncated_normal_initializer(stddev=1.0 / (EMBED_SIZE ** 0.5))) nce_bias = tf.get_variable('nce_bias', initializer = tf.zeros([VOCAB_SIZE])) ############################# ########## TO DO ############ ############################# # define loss function to be NCE loss function loss = tf.reduce_mean(tf.nn.nce_loss(weights = nce_weight, biases = nce_bias, labels = target_words, inputs = embed, num_sampled = NUM_SAMPLED, num_classes = VOCAB_SIZE), name = 'loss') ############################# ########## TO DO ############ ############################# # Step 5: define optimizer that follows gradient descent update rule # to minimize loss optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss) ############################# ########## TO DO ############ ############################# utils.safe_mkdir('checkpoints') with tf.Session() as sess: # Step 6: initialize iterator and variables sess.run(iterator.initializer) sess.run(tf.global_variables_initializer()) ############################# ########## TO DO ############ ############################# total_loss = 0.0 # we use this to calculate late average loss in the last SKIP_STEP steps writer = tf.summary.FileWriter('graphs/word2vec_simple', sess.graph) for index in range(NUM_TRAIN_STEPS): try: # Step 7: execute optimizer and fetch loss loss_batch, _ = sess.run([loss, optimizer]) ############################# ########## TO DO ############ ############################# total_loss += loss_batch if (index + 1) % SKIP_STEP == 0: print('Average loss at step {}: {:5.1f}'.format(index, total_loss / SKIP_STEP)) total_loss = 0.0 except tf.errors.OutOfRangeError: sess.run(iterator.initializer) writer.close()
def init_bias(self, dim_out, name=None): return tf.Variable(tf.zeros([dim_out]), name=name)
net = tf_util.max_pool2d(net, [num_point,1], padding='VALID', scope='maxpool') # MLP on global point cloud vector net = tf.reshape(net, [batch_size, -1]) net = tf_util.fully_connected(net, 512, bn=True, is_training=is_training, scope='fc1', bn_decay=bn_decay) net = tf_util.fully_connected(net, 256, bn=True, is_training=is_training, scope='fc2', bn_decay=bn_decay) net = tf_util.dropout(net, keep_prob=0.7, is_training=is_training, scope='dp1') net = tf_util.fully_connected(net, 40, activation_fn=None, scope='fc3') return net, end_points def get_loss(pred, label, end_points): """ pred: B*NUM_CLASSES, label: B, """ loss = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred, labels=label) classify_loss = tf.reduce_mean(loss) tf.summary.scalar('classify loss', classify_loss) return classify_loss if __name__=='__main__': with tf.Graph().as_default(): inputs = tf.zeros((32,1024,3)) outputs = get_model(inputs, tf.constant(True)) print(outputs)
def construct_sampling_ops(model): """Builds a graph fragment for sampling over a RNNModel. Args: model: a RNNModel. Returns: A Tensor with shape (max_seq_len, batch_size) containing one sampled translation for each input sentence in model.inputs.x. """ decoder = model.decoder batch_size = tf.shape(decoder.init_state)[0] high_depth = 0 if decoder.high_gru_stack == None \ else len(decoder.high_gru_stack.grus) i = tf.constant(0) init_y = -tf.ones(dtype=tf.int32, shape=[batch_size]) init_emb = tf.zeros(dtype=tf.float32, shape=[batch_size, decoder.embedding_size]) y_array = tf.TensorArray( dtype=tf.int32, size=decoder.translation_maxlen, clear_after_read= True, #TODO: does this help? or will it only introduce bugs in the future? name='y_sampled_array') init_loop_vars = [ i, decoder.init_state, [decoder.init_state] * high_depth, init_y, init_emb, y_array ] def cond(i, base_state, high_states, prev_y, prev_emb, y_array): return tf.logical_and(tf.less(i, decoder.translation_maxlen), tf.reduce_any(tf.not_equal(prev_y, 0))) def body(i, prev_base_state, prev_high_states, prev_y, prev_emb, y_array): state1 = decoder.grustep1.forward(prev_base_state, prev_emb) att_ctx = decoder.attstep.forward(state1) base_state = decoder.grustep2.forward(state1, att_ctx) if decoder.high_gru_stack == None: output = base_state high_states = [] else: if decoder.high_gru_stack.context_state_size == 0: output, high_states = decoder.high_gru_stack.forward_single( prev_high_states, base_state) else: output, high_states = decoder.high_gru_stack.forward_single( prev_high_states, base_state, context=att_ctx) logits = decoder.predictor.get_logits(prev_emb, output, att_ctx, multi_step=False) new_y = tf.multinomial(logits, num_samples=1) new_y = tf.cast(new_y, dtype=tf.int32) new_y = tf.squeeze(new_y, axis=1) new_y = tf.where(tf.equal(prev_y, tf.constant(0, dtype=tf.int32)), tf.zeros_like(new_y), new_y) y_array = y_array.write(index=i, value=new_y) new_emb = decoder.y_emb_layer.forward(new_y, factor=0) return i + 1, base_state, high_states, new_y, new_emb, y_array final_loop_vars = tf.while_loop(cond=cond, body=body, loop_vars=init_loop_vars, back_prop=False) i, _, _, _, _, y_array = final_loop_vars sampled_ys = y_array.gather(tf.range(0, i)) return sampled_ys
def build_model(self): context = tf.placeholder("float32", [self.batch_size, self.ctx_shape[0], self.ctx_shape[1]]) sentence = tf.placeholder("int32", [self.batch_size, self.n_lstm_steps]) mask = tf.placeholder("float32", [self.batch_size, self.n_lstm_steps]) h, c = self.get_initial_lstm(tf.reduce_mean(context, 1)) # TensorFlow가 dot(3D tensor, matrix) 계산을 못함;;; ㅅㅂ 삽질 ㄱㄱ context_flat = tf.reshape(context, [-1, self.dim_ctx]) context_encode = tf.matmul(context_flat, self.image_att_W) # (batch_size, 196, 512) context_encode = tf.reshape(context_encode, [-1, ctx_shape[0], ctx_shape[1]]) loss = 0.0 for ind in range(self.n_lstm_steps): if ind == 0: word_emb = tf.zeros([self.batch_size, self.dim_embed]) else: tf.get_variable_scope().reuse_variables() with tf.device("/cpu:0"): word_emb = tf.nn.embedding_lookup(self.Wemb, sentence[:,ind-1]) x_t = tf.matmul(word_emb, self.lstm_W) + self.lstm_b # (batch_size, hidden*4) labels = tf.expand_dims(sentence[:,ind], 1) indices = tf.expand_dims(tf.range(0, self.batch_size, 1), 1) concated = tf.concat(1, [indices, labels]) onehot_labels = tf.sparse_to_dense( concated, tf.pack([self.batch_size, self.n_words]), 1.0, 0.0) context_encode = context_encode + \ tf.expand_dims(tf.matmul(h, self.hidden_att_W), 1) + \ self.pre_att_b context_encode = tf.nn.tanh(context_encode) # 여기도 context_encode: 3D -> flat required context_encode_flat = tf.reshape(context_encode, [-1, self.dim_ctx]) # (batch_size*196, 512) alpha = tf.matmul(context_encode_flat, self.att_W) + self.att_b # (batch_size*196, 1) alpha = tf.reshape(alpha, [-1, self.ctx_shape[0]]) alpha = tf.nn.softmax( alpha ) weighted_context = tf.reduce_sum(context * tf.expand_dims(alpha, 2), 1) lstm_preactive = tf.matmul(h, self.lstm_U) + x_t + tf.matmul(weighted_context, self.image_encode_W) i, f, o, new_c = tf.split(1, 4, lstm_preactive) i = tf.nn.sigmoid(i) f = tf.nn.sigmoid(f) o = tf.nn.sigmoid(o) new_c = tf.nn.tanh(new_c) c = f * c + i * new_c h = o * tf.nn.tanh(new_c) logits = tf.matmul(h, self.decode_lstm_W) + self.decode_lstm_b logits = tf.nn.relu(logits) logits = tf.nn.dropout(logits, 0.5) logit_words = tf.matmul(logits, self.decode_word_W) + self.decode_word_b cross_entropy = tf.nn.softmax_cross_entropy_with_logits(logit_words, onehot_labels) cross_entropy = cross_entropy * mask[:,ind] current_loss = tf.reduce_sum(cross_entropy) loss = loss + current_loss loss = loss / tf.reduce_sum(mask) return loss, context, sentence, mask
_, csv_row = reader.read(filename_queue) record_defaults = [[1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1]] col1, col2, col3, col4, col5, col6, col7, col8, col9, col10, col11 = tf.decode_csv( csv_row, record_defaults=record_defaults) features = tf.stack([col1, col2, col3, col4, col5, col6, col7, col8, col9]) label = tf.stack([col10, col11]) return features, label filename_queue = tf.train.string_input_producer(["COMBINED_DATA.csv"]) features, labels = create_file_reader_ops(filename_queue) x = tf.placeholder(tf.float32, shape=[None, 9]) y_ = tf.placeholder(tf.float32, shape=[None, 2]) W = tf.Variable(tf.zeros([9, 2])) b = tf.Variable(tf.zeros([2])) y = tf.matmul(x, W) + b cross_entropy = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=y_, logits=y)) train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) start = time.time() for i in range(500):
def construct_beam_search_ops(models, beam_size): """Builds a graph fragment for beam search over one or more RNNModels. Strategy: compute the log_probs - same as with sampling for sentences that are ended set log_prob(<eos>)=0, log_prob(not eos)=-inf add previous cost to log_probs run top k -> (idxs, values) use values as new costs divide idxs by num_classes to get state_idxs use gather to get new states take the remainder of idxs after num_classes to get new_predicted words """ # Get some parameter settings. For ensembling, some parameters are required # to be consistent across all models but others are not. In the former # case, we assume that consistency has already been checked. For the # parameters that are allowed to vary across models, the first model's # settings take precedence. decoder = models[0].decoder batch_size = tf.shape(decoder.init_state)[0] embedding_size = decoder.embedding_size translation_maxlen = decoder.translation_maxlen target_vocab_size = decoder.target_vocab_size high_depth = 0 if decoder.high_gru_stack == None \ else len(decoder.high_gru_stack.grus) # Initialize loop variables i = tf.constant(0) init_ys = -tf.ones(dtype=tf.int32, shape=[batch_size]) init_embs = [ tf.zeros(dtype=tf.float32, shape=[batch_size, embedding_size]) ] * len(models) f_min = numpy.finfo(numpy.float32).min init_cost = [0.] + [f_min] * ( beam_size - 1) # to force first top k are from first hypo only init_cost = tf.constant(init_cost, dtype=tf.float32) init_cost = tf.tile(init_cost, multiples=[batch_size / beam_size]) # duplications ys_array = tf.TensorArray(dtype=tf.int32, size=translation_maxlen, clear_after_read=True, name='y_sampled_array') p_array = tf.TensorArray(dtype=tf.int32, size=translation_maxlen, clear_after_read=True, name='parent_idx_array') init_base_states = [m.decoder.init_state for m in models] init_high_states = [[m.decoder.init_state] * high_depth for m in models] init_loop_vars = [ i, init_base_states, init_high_states, init_ys, init_embs, init_cost, ys_array, p_array ] # Prepare cost matrix for completed sentences -> Prob(EOS) = 1 and Prob(x) = 0 eos_log_probs = tf.constant([[0.] + ([f_min] * (target_vocab_size - 1))], dtype=tf.float32) eos_log_probs = tf.tile(eos_log_probs, multiples=[batch_size, 1]) def cond(i, prev_base_states, prev_high_states, prev_ys, prev_embs, cost, ys_array, p_array): return tf.logical_and(tf.less(i, translation_maxlen), tf.reduce_any(tf.not_equal(prev_ys, 0))) def body(i, prev_base_states, prev_high_states, prev_ys, prev_embs, cost, ys_array, p_array): # get predictions from all models and sum the log probs sum_log_probs = None base_states = [None] * len(models) high_states = [None] * len(models) for j in range(len(models)): d = models[j].decoder states1 = d.grustep1.forward(prev_base_states[j], prev_embs[j]) att_ctx = d.attstep.forward(states1) base_states[j] = d.grustep2.forward(states1, att_ctx) if d.high_gru_stack == None: stack_output = base_states[j] high_states[j] = [] else: if d.high_gru_stack.context_state_size == 0: stack_output, high_states[ j] = d.high_gru_stack.forward_single( prev_high_states[j], base_states[j]) else: stack_output, high_states[ j] = d.high_gru_stack.forward_single( prev_high_states[j], base_states[j], context=att_ctx) logits = d.predictor.get_logits(prev_embs[j], stack_output, att_ctx, multi_step=False) log_probs = tf.nn.log_softmax(logits) # shape (batch, vocab_size) if sum_log_probs == None: sum_log_probs = log_probs else: sum_log_probs += log_probs # set cost of EOS to zero for completed sentences so that they are in top k # Need to make sure only EOS is selected because a completed sentence might # kill ongoing sentences sum_log_probs = tf.where(tf.equal(prev_ys, 0), eos_log_probs, sum_log_probs) all_costs = sum_log_probs + tf.expand_dims( cost, axis=1 ) # TODO: you might be getting NaNs here since -inf is in log_probs all_costs = tf.reshape(all_costs, shape=[-1, target_vocab_size * beam_size]) values, indices = tf.nn.top_k( all_costs, k=beam_size ) #the sorted option is by default True, is this needed? new_cost = tf.reshape(values, shape=[batch_size]) offsets = tf.range(start=0, delta=beam_size, limit=batch_size, dtype=tf.int32) offsets = tf.expand_dims(offsets, axis=1) survivor_idxs = (indices / target_vocab_size) + offsets new_ys = indices % target_vocab_size survivor_idxs = tf.reshape(survivor_idxs, shape=[batch_size]) new_ys = tf.reshape(new_ys, shape=[batch_size]) new_embs = [ m.decoder.y_emb_layer.forward(new_ys, factor=0) for m in models ] new_base_states = [ tf.gather(s, indices=survivor_idxs) for s in base_states ] new_high_states = [[ tf.gather(s, indices=survivor_idxs) for s in states ] for states in high_states] new_cost = tf.where(tf.equal(new_ys, 0), tf.abs(new_cost), new_cost) ys_array = ys_array.write(i, value=new_ys) p_array = p_array.write(i, value=survivor_idxs) return i + 1, new_base_states, new_high_states, new_ys, new_embs, new_cost, ys_array, p_array final_loop_vars = tf.while_loop(cond=cond, body=body, loop_vars=init_loop_vars, back_prop=False) i, _, _, _, _, cost, ys_array, p_array = final_loop_vars indices = tf.range(0, i) sampled_ys = ys_array.gather(indices) parents = p_array.gather(indices) cost = tf.abs(cost) #to get negative-log-likelihood return sampled_ys, parents, cost
def log_regression(): learning_rate = 0.01 batch_size = 1024 iter_num = 30 display_step = 1 beta = 0.1 threshold = 0.5 seed = 5 np.random.seed(seed) tf.set_random_seed(seed) print("Loading training data...") train_data = pd.read_csv(currentDirectory + "train\\" + "CombinedTrainData.csv", header=None) print("Shuffling training data...") train_data = train_data.iloc[np.random.permutation(len(train_data))] train_X = train_data.iloc[:, 0:number_of_columns - 1] #selecting the feature columns train_X = np.array(train_X) train_Y = np.array(train_data.iloc[:, number_of_columns - 1:]) # selecting the label column train_Y = np.array(train_Y) # print ("Loading validation data...") # dev_data = pd.read_csv(currentDirectory + "dev\\" + "CombinedDevData.csv") # dev_X = dev_data.iloc[:,0:number_of_columns-1] #selecting the feature columns # #dev_X = min_max_normalized(np.array(dev_X)) # dev_Y = np.array(dev_data.iloc[:,number_of_columns-1:]) # selecting the label column print("Loading test data...") test_data = pd.read_csv(currentDirectory + "test\\" + "CombinedTestData.csv") test_X = test_data.iloc[:, 0:number_of_columns - 1] #selecting the feature columns test_X = np.array(test_X) #dev_X = min_max_normalized(np.array(dev_X)) test_Y = np.array(test_data.iloc[:, number_of_columns - 1:]) # selecting the label column test_Y = np.array(test_Y) #Defining the model framework w = tf.Variable(tf.zeros([44, 1]), name="weights") b = tf.Variable(tf.zeros([1]), name="bias") X = tf.placeholder(tf.float32, [None, 44], name='data') Y = tf.placeholder(tf.float32, [None, 1], name='target') pred = tf.sigmoid(tf.matmul(X, w) + b) # Minimize error using cross entropy (1st cost function) #cost = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=pred, labels=Y)) # (2nd cost function used) #cost = tf.multiply(-1.0, tf.reduce_mean(tf.add(tf.multiply(beta,tf.multiply(Y,tf.log(pred))),tf.multiply(tf.subtract(1.0, Y) , tf.log(tf.subtract(1.0, pred)))))) cost = tf.reduce_mean( tf.nn.weighted_cross_entropy_with_logits(logits=pred, targets=Y, pos_weight=beta)) #To calculate precision, which is the main metric rounded_pred = tf.cast(pred >= threshold, dtype=tf.float32) total_ones = tf.reduce_sum(rounded_pred) true_positive = tf.reduce_sum(tf.multiply(rounded_pred, Y)) optimizer = tf.train.GradientDescentOptimizer( learning_rate=learning_rate).minimize(cost) #correct = tf.cast(tf.equal(tf.round(pred), Y), dtype=tf.float32) #accuracy = tf.metrics.precision(labels = Y, predictions = tf.round(pred)) writer = tf.summary.FileWriter('./graphs', tf.get_default_graph()) train_precison = [] test_precision = [] init = tf.global_variables_initializer() init_l = tf.local_variables_initializer() with tf.Session() as sess: sess.run(init) sess.run(init_l) for epoch in range(iter_num): avg_cost = 0.0 tp = 0 tp_fp = 0.000000000001 # to prevent division by 0 test_tp = 0 test_tp_fp = 0.000000000001 # to prevent division by 0 total_batch = math.ceil(train_X.shape[0] / batch_size) for i in range(total_batch - 1): batch_xs = train_X[i * batch_size:batch_size * i + batch_size, :] batch_ys = train_Y[i * batch_size:batch_size * i + batch_size, :] test_xs = test_X[i * batch_size:batch_size * i + batch_size, :] test_ys = test_Y[i * batch_size:batch_size * i + batch_size, :] # # convert into a matrix, and the shape of the placeholder to correspond # temp_train_acc = sess.run(accuracy, feed_dict={X: batch_xs, Y: batch_ys}) # temp_test_acc = sess.run(accuracy, feed_dict={X: dev_X, Y: dev_Y}) # # recode the result # loss_trace.append(temp_loss) # train_acc.append(temp_train_acc) # validation_acc.append(temp_test_acc) # output # Run optimization op (backprop) and cost op (to get loss value) #_, c, p, temp_train_acc = sess.run([optimizer, cost, pred, accuracy], feed_dict={X: batch_xs, Y: batch_ys}) _, c, p, ones, correct_ones = sess.run( [optimizer, cost, pred, total_ones, true_positive], feed_dict={ X: batch_xs, Y: batch_ys }) test_ones, test_correct_ones = sess.run( [total_ones, true_positive], feed_dict={ X: test_xs, Y: test_ys }) avg_cost += c tp += correct_ones tp_fp += ones test_tp += test_correct_ones test_tp_fp += test_ones #including the last batch batch_xs = train_X[batch_size * (total_batch - 1):, :] batch_ys = train_Y[batch_size * (total_batch - 1):, :] #_, c, p = sess.run([optimizer, cost, pred], feed_dict={X: batch_xs, Y: batch_ys}) #_, c, p, temp_train_acc = sess.run([optimizer, cost, pred, accuracy], feed_dict={X: batch_xs, Y: batch_ys}) _, c, p, ones, correct_ones = sess.run( [optimizer, cost, pred, total_ones, true_positive], feed_dict={ X: batch_xs, Y: batch_ys }) test_ones, test_correct_ones = sess.run( [total_ones, true_positive], feed_dict={ X: test_xs, Y: test_ys }) avg_cost += c tp += correct_ones tp_fp += ones test_tp += test_correct_ones test_tp_fp += test_ones #temp_train_acc = sess.run(accuracy, feed_dict={X: batch_xs, Y: batch_ys}) #train_acc.append(temp_train_acc[0]) #print ("Tensorflow precision: ", temp_train_acc[0]) print("Train Precision: ", float(tp) / float(tp_fp)) print("Test Precision: ", float(test_tp) / float(test_tp_fp)) # Display logs per epoch step if (epoch + 1) % display_step == 0: #print("Epoch:", '%04d' % (epoch+1), "cost=", "{:.9f}".format(avg_cost)) #print('epoch: {:4d} cost = : {:.9f} train_precision: {:.9f} '.format(epoch + 1, avg_cost, temp_train_acc[0])) print( 'epoch: {:4d} cost = : {:.9f} train_precision: {:.9f} test_precision: {:.9f}' .format(epoch + 1, avg_cost, float(tp) / float(tp_fp), float(test_tp) / float(test_tp_fp))) print("Optimization Finished!") writer.close()
from tensorflow.examples.tutorials.mnist import input_data mnist = input_data.read_data_sets('MNIST_data', one_hot=True) import tensorflow as tf import numpy as np sess = tf.InteractiveSession() x = tf.placeholder(tf.float32, shape=[None, 784]) y_ = tf.placeholder(tf.float32, shape=[None, 10]) W = tf.Variable(tf.zeros([784, 10])) b = tf.Variable(tf.zeros([10])) sess.run(tf.global_variables_initializer()) y = tf.matmul(x, W) + b cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, y_)) train_step = tf.train.GradientDescentOptimizer(0.5).minimize(cross_entropy) for i in range(1000): batch = mnist.train.next_batch(100) train_step.run(feed_dict={x: batch[0], y_: batch[1]}) print("training complete") correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
def _create_variables(self): self.global_step = tf.Variable( initial_value=0, trainable=False ) boundaries = np.arange(0, self.epoch, self.anneal_every) * self.steps_per_epoch learning_rates = self.lr / (2 ** np.arange(len(boundaries) + 1)) self.learning_rate = tf.train.piecewise_constant( self.global_step, list(boundaries.astype(np.int32)), [self.lr] + list(learning_rates.astype(np.float32)) ) J, D = self.sentence_length, self.embedding_size j = np.expand_dims(np.linspace(1, J, J), 0) k = np.expand_dims(np.linspace(1, D, D), 1) l = (1 - j / J) - k / D * (1 - 2 * j / J) self.l_pe = tf.constant(l, tf.float32, shape=[J, D]) with tf.variable_scope('input'): # sentences - stories - facts self.x = tf.placeholder(tf.int32, [None, self.memory_size, self.sentence_length], name='facts') self.q = tf.placeholder(tf.int32, [None, self.sentence_length], name='query') self.a = tf.placeholder(tf.int32, [None], name='answer') self.embeddings = {} with tf.variable_scope('embeddings'): # +1 is for fixed zero input zero_embedding = tf.zeros([1, self.embedding_size]) B = tf.concat(0, [ zero_embedding, tf.get_variable('B', [self.vocab_size, self.embedding_size], tf.float32), ]) self.embeddings['B'] = B C_prev = B TC_prev = tf.get_variable('TB', [self.memory_size, self.embedding_size], tf.float32) for k in range(1, self.hops + 1): k = str(k) with tf.variable_scope('hop' + k): C = tf.concat(0, [ zero_embedding, tf.get_variable('C', [self.vocab_size, self.embedding_size], tf.float32) ]) TC = tf.get_variable('TC', [self.memory_size, self.embedding_size], tf.float32) self.embeddings['A' + k] = C_prev self.embeddings['TA' + k] = TC_prev self.embeddings['C' + k] = C self.embeddings['TC' + k] = TC C_prev = C TC_prev = TC tf.histogram_summary('A' + k, self.embeddings['A' + k]) tf.histogram_summary('C' + k, C) self.embeddings['W'] = tf.transpose(self.embeddings['C' + str(self.hops)])
labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) # word2vec 모델의 결과 값인 임베딩 벡터를 저장할 변수입니다. # 총 단어 갯수와 임베딩 갯수를 크기로 하는 두 개의 차원을 갖습니다. embeddings = tf.Variable(tf.random_uniform([voc_size, embedding_size], -1.0, 1.0), name='embeddings') # 임베딩 벡터의 차원에서 학습할 입력값에 대한 행들을 뽑아옵니다. # 예) embeddings inputs selected # [[1, 2, 3] -> [2, 3] -> [[2, 3, 4] # [2, 3, 4] [3, 4, 5]] # [3, 4, 5] # [4, 5, 6]] selected_embed = tf.nn.embedding_lookup(embeddings, inputs, name='selected_embed') # nce_loss 함수에서 사용할 변수들을 정의합니다. nce_weights = tf.Variable(tf.random_uniform([voc_size, embedding_size], -1.0, 1.0), name='nce_weights') nce_biases = tf.Variable(tf.zeros([voc_size]), name='nce_biases') # nce_loss 함수를 직접 구현하려면 매우 복잡하지만, # 함수를 텐서플로우가 제공하므로 그냥 tf.nn.nce_loss 함수를 사용하기만 하면 됩니다. loss = tf.reduce_mean( tf.nn.nce_loss(nce_weights, nce_biases, labels, selected_embed, num_sampled, voc_size)) train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss) ######### # 신경망 모델 학습 ###### sess = tf.Session() init = tf.global_variables_initializer() sess.run(init)
h=encoder_final_state_h) #define decoder with minimal use of predefined functions (going to be longer) with tf.variable_scope('decoding') as decoding_scope: decoder_cell = LSTMCell(decoder_hidden_units) encoder_max_time, batch_size = tf.unstack(tf.shape(encoder_inputs)) decoder_lengths = encoder_inputs_length + 3 #output projection #define our weights and biases W = tf.Variable(tf.random_uniform(shape=[decoder_hidden_units, vocab_size], minval=-1, maxval=1), dtype=tf.float32) b = tf.Variable(tf.zeros([vocab_size]), dtype=tf.float32) #For padding and EOS eos_time_slice = tf.ones([batch_size], dtype=tf.int32, name='EOS') pad_time_slice = tf.zeros([batch_size], dtype=tf.int32, name='PAD') eos_step_embedded = tf.nn.embedding_lookup(embeddings, eos_time_slice) pad_step_embedded = tf.nn.embedding_lookup(embeddings, pad_time_slice) def loop_fn_initial(): initial_elements_finished = (0 >= decoder_lengths) initial_input = eos_step_embedded initial_cell_state = encoder_final_state initial_cell_output = None initial_loop_state = None
def forward(self, x, x_mask=None, context_layer=None, init_state=None): assert not (self.reverse_alternation and x_mask == None) # assert (context_layer == None or # tf.shape(context_layer)[-1] == self.context_state_size) def create_step_fun(gru): def step_fn(prev_state, x): gates_x2d, proposal_x2d = x[0], x[1] new_state = gru.forward(prev_state, gates_x=gates_x2d, proposal_x=proposal_x2d) if len(x) > 2: mask = x[2] new_state *= mask # batch x 1 # first couple of states of reversed encoder should be zero # this is why we need to multiply by mask # this way, when the reversed encoder reaches actual words # the state will be zeros and not some accumulated garbage return new_state return step_fn if init_state is None: init_state = tf.zeros(shape=[self.batch_size, self.state_size], dtype=tf.float32) if x_mask != None: x_mask_r = tf.reverse(x_mask, axis=[0]) x_mask_bwd = tf.expand_dims(x_mask_r, axis=[2]) #seqLen x batch x 1 for i, gru in enumerate(self.grus): layer = RecurrentLayer(initial_state=init_state, step_fn=create_step_fun(gru)) if context_layer == None: x2 = x else: x2 = tf.concat([x, context_layer], axis=-1) if not self.alternating: left_to_right = True else: if self.reverse_alternation: left_to_right = (i % 2 == 1) else: left_to_right = (i % 2 == 0) if left_to_right: # Recurrent state flows from left to right in this layer. gates_x, proposal_x = gru.precompute_from_x(x2) h = layer.forward((gates_x, proposal_x)) else: # Recurrent state flows from right to left in this layer. x2_reversed = tf.reverse(x2, axis=[0]) gates_x, proposal_x = gru.precompute_from_x(x2_reversed) h_reversed = layer.forward((gates_x, proposal_x, x_mask_bwd)) h = tf.reverse(h_reversed, axis=[0]) # Compute the word states, which will become the input for the # next layer (or the output of the stack if we're at the top). if not self.residual_connections or i < self.first_residual_output: x = h else: x += h # Residual connection return x
# naive training, train a list of parameter (W & b) # with these settings, the result will converge by 2500 steps import tensorflow as tf import numpy as np # construct data set target = [0.123, 0.234, 0.345, 0.456, 0.567, 0.678, 0.789, 0.898, 0.987, 10.654] _x = np.float32(np.random.rand(10, 1000)) _y = np.dot(target, _x) + 0.233 b = tf.Variable(tf.zeros([1])) W = tf.Variable(tf.random_uniform(shape = [1, 10], minval = -100.0, maxval = 100.0)) y = tf.matmul(W, _x) + b # set loss loss = tf.reduce_mean(tf.square(y - _y)) # set optimizer, namely gradient descent mode # when the learning rate is too high, the descent will "boom" and get "nan" optimizer = tf.train.GradientDescentOptimizer(0.15) # set the target of training train = optimizer.minimize(loss) # init init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) # training
train_inputs = tf.placeholder(tf.int32, shape=[batch_size]) train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) valid_dataset = tf.constant(valid_examples, dtype=tf.int32) # Ops and variables pinned to the CPU because of missing GPU implementation with tf.device('/cpu:0'): # Look up embeddings for inputs. embeddings = tf.Variable( tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) embed = tf.nn.embedding_lookup(embeddings, train_inputs) # Construct the variables for the NCE loss nce_weights = tf.Variable( tf.truncated_normal([vocabulary_size, embedding_size], stddev=1.0 / math.sqrt(embedding_size))) nce_biases = tf.Variable(tf.zeros([vocabulary_size])) # Compute the average NCE loss for the batch. # tf.nce_loss automatically draws a new sample of the negative labels each # time we evaluate the loss. loss = tf.reduce_mean( tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, labels=train_labels, inputs=embed, num_sampled=num_sampled, num_classes=vocabulary_size)) # Construct the SGD optimizer using a learning rate of 1.0. optimizer = tf.train.GradientDescentOptimizer(1.0).minimize(loss)
image = tf.placeholder(tf.float32, [None, 224, 224, 3]) image = tf.div(tf.subtract(image, tf.reduce_mean(image)), reduce_std(image)) labels = tf.placeholder(tf.float32, [None, 54]) net = VGG_CNN_F({'data': image}) fc7 = net.layers['fc7'] print fc7.get_shape() w1 = tf.Variable(tf.random_normal(shape=[4096, 100], mean=0, stddev=2 / np.sqrt(4096)), name='feature_layer') b1 = tf.Variable(tf.zeros([100]), name='feature_layer_bias') output = tf.matmul(fc7, w1) + b1 w = tf.Variable(tf.random_normal(shape=[100, 54], mean=0, stddev=2 / np.sqrt(100)), name='last_W') b = tf.Variable(tf.zeros([54]), name='last_b') output1 = tf.matmul(output, w) + b cost = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=output)) tf.summary.scalar('Cost', cost) opt = tf.train.AdamOptimizer(learning_rate=0.00001) train_op = opt.minimize(cost) merged = tf.summary.merge_all()
trainData = mnist.data[:60000].astype(float) / 255. trainLabels = mnist.target[:60000] testData = mnist.data[60000:].astype(float) / 255. testLabels = mnist.target[60000:] # Create the model tf.reset_default_graph() # reset if we are rerunning code to avoid variable re-use # Input layer x = tf.placeholder(tf.float32, [None, 784]) # Hidden layer #W1 = tf.get_variable('W1', [784, 100], initializer=tf.random_normal_initializer()) #W1 = tf.Variable(tf.random_normal([784, 100])) W1 = tf.Variable(tf.truncated_normal([784, 100], stddev=1.0 / np.sqrt(784))) b1 = tf.Variable(tf.zeros([100,])) z1 = tf.matmul(x, W1) + b1 y1 = tf.nn.tanh(z1) # Output layer #W2 = tf.Variable(tf.random_normal([100, 10])) W2 = tf.Variable(tf.truncated_normal([100, 10], stddev=1.0 / np.sqrt(100))) b2 = tf.Variable(tf.zeros([10,])) y2 = tf.matmul(y1, W2) + b2 # Define the output y = y2 # Define loss and optimizer y_ = tf.placeholder(tf.float32, [None, 10])
fake_box_predictor = tf.train.Checkpoint( _base_tower_layers_for_heads=detection_model._box_predictor. _base_tower_layers_for_heads, # _prediction_heads=detection_model._box_predictor._prediction_heads, # (i.e., the classification head that we *will not* restore) _box_prediction_head=detection_model._box_predictor._box_prediction_head, ) fake_model = tf.train.Checkpoint( _feature_extractor=detection_model._feature_extractor, _box_predictor=fake_box_predictor) ckpt = tf.train.Checkpoint(model=fake_model) ckpt.restore(checkpoint_path).expect_partial() # Run model through a dummy image so that variables are created image, shapes = detection_model.preprocess(tf.zeros([1, 640, 640, 3])) prediction_dict = detection_model.predict(image, shapes) _ = detection_model.postprocess(prediction_dict, shapes) print('Weights restored!') ########################################################################################## # tf.keras.backend.set_learning_phase(True) # These parameters can be tuned; since our training set has 5 images # it doesn't make sense to have a much larger batch size, though we could # fit more examples in memory if we wanted to. batch_size = 4 learning_rate = 0.01 num_batches = 100 # Select variables in top layers to fine-tune.
def sample_sparse_grid_like(gt_flow, target_density=75, height=384, width=512): print("sample_sparse_grid_like") # Important: matches is already normalised to [0, 1] num_samples = tf.multiply( tf.multiply(tf.divide(target_density, 100.0), height), width) aspect_ratio = tf.divide(width, height) # Compute as in invalid_like for a random box to know the number of samples in horizontal and vertical num_samples_w = tf.cast(tf.round( tf.sqrt(tf.multiply(num_samples, aspect_ratio))), dtype=tf.int32) num_samples_h = tf.cast(tf.round( tf.divide(tf.cast(num_samples_w, dtype=tf.float32), aspect_ratio)), dtype=tf.int32) # Check crop dimensions are plausible, otherwise crop them to fit (this alters the density we were sampling at) num_samples_h = tf.cond(tf.greater(num_samples_h, tf.constant(height)), lambda: tf.constant(height, dtype=tf.int32), lambda: num_samples_h) num_samples_w = tf.cond(tf.greater(num_samples_w, tf.constant(width)), lambda: tf.constant(width, dtype=tf.int32), lambda: num_samples_w) delta_rows = tf.cast((height - 1 - 0) / num_samples_h, tf.float32) sample_points_h = tf.cast(tf.round( tf.range(start=0, limit=height, delta=delta_rows, dtype=tf.float32)), dtype=tf.int32) delta_cols = tf.cast((width - 1 - 0) / num_samples_w, tf.float32) sample_points_w = tf.cast(tf.round( tf.range(start=0, limit=width, delta=delta_cols, dtype=tf.float32)), dtype=tf.int32) # Create meshgrid of all combinations (i.e.: coordinates to sample at) rows, cols = tf.meshgrid(sample_points_h, sample_points_w, indexing='ij') rows_flatten = tf.reshape(rows, [-1]) cols_flatten = tf.reshape(cols, [-1]) # Compute absolute indices as row * width + cols indices = tf.add(tf.multiply(rows_flatten, width), cols_flatten) ones = tf.ones(tf.shape(indices), dtype=tf.float32) zeros = lambda: tf.zeros((height * width), dtype=tf.float32) matches = tf.Variable(initial_value=zeros, trainable=False) matches = tf.scatter_update(matches, indices, ones) # all 1D tensors # Randomly subtract a part with a random rectangle (superpixels in the future) corrupt_mask = tf.random_uniform([], maxval=2, dtype=tf.int32) matches = tf.cond( tf.greater(corrupt_mask, tf.constant(0)), lambda: corrupt_sparse_flow_once( matches, target_density, height, width), lambda: return_identity_one(matches)) sampling_mask = tf.reshape(matches, (height, width)) # sampling_mask of size (h, w) matches = tf.cast(tf.expand_dims(sampling_mask, -1), dtype=tf.float32) # convert to (h, w, 1) # Sample ground truth flow with given map # sampling_mask = sampling_mask[:, :, tf.newaxis] # sampling_mask_rep = tf.tile(sampling_mask, [1, 1, 2]) # sampling_mask_flatten = tf.reshape(sampling_mask_rep, [-1]) # sampling_mask_flatten_where = tf.where( # tf.equal(sampling_mask_flatten, tf.cast(1, dtype=sampling_mask_flatten.dtype))) # sampling_mask_flatten_where = tf.reshape(sampling_mask_flatten_where, [-1]) # # gt_flow_sampling_mask = tf.boolean_mask(gt_flow, sampling_mask_rep) # zeros = lambda: tf.zeros(tf.reduce_prod(gt_flow.shape), dtype=tf.float32) # sparse_flow = tf.Variable(initial_value=zeros, dtype=tf.float32, trainable=False) # sparse_flow = tf.scatter_update(sparse_flow, sampling_mask_flatten_where, gt_flow_sampling_mask) # sparse_flow = tf.reshape(sparse_flow, gt_flow.shape) sparse_flow = mask_to_sparse_flow(sampling_mask, gt_flow) return matches, sparse_flow