def sample_from_discretized_mix_logistic(l, nr_mix): ls = int_shape(l) xs = ls[:-1] + [3] # unpack parameters logit_probs = l[:, :, :, :nr_mix] l = tf.reshape(l[:, :, :, nr_mix:], xs + [nr_mix * 3]) # sample mixture indicator from softmax sel = tf.one_hot(tf.argmax(logit_probs - tf.log(-tf.log(tf.random_uniform( logit_probs.get_shape(), minval=1e-5, maxval=1. - 1e-5))), 3), depth=nr_mix, dtype=tf.float32) sel = tf.reshape(sel, xs[:-1] + [1, nr_mix]) # select logistic parameters means = tf.reduce_sum(l[:, :, :, :, :nr_mix] * sel, 4) log_scales = tf.maximum(tf.reduce_sum( l[:, :, :, :, nr_mix:2 * nr_mix] * sel, 4), -7.) coeffs = tf.reduce_sum(tf.nn.tanh( l[:, :, :, :, 2 * nr_mix:3 * nr_mix]) * sel, 4) # sample from logistic & clip to interval # we don't actually round to the nearest 8bit value when sampling u = tf.random_uniform(means.get_shape(), minval=1e-5, maxval=1. - 1e-5) x = means + tf.exp(log_scales) * (tf.log(u) - tf.log(1. - u)) x0 = tf.minimum(tf.maximum(x[:, :, :, 0], -1.), 1.) x1 = tf.minimum(tf.maximum( x[:, :, :, 1] + coeffs[:, :, :, 0] * x0, -1.), 1.) x2 = tf.minimum(tf.maximum( x[:, :, :, 2] + coeffs[:, :, :, 1] * x0 + coeffs[:, :, :, 2] * x1, -1.), 1.) return tf.concat([tf.reshape(x0, xs[:-1] + [1]), tf.reshape(x1, xs[:-1] + [1]), tf.reshape(x2, xs[:-1] + [1])], 3)
def __init__(self,units,activation=None,mean=None,std=None,eur=False,no_bias=False): assert not(units is None),"You need to provide the number of units ([n_in,n_out])" if(mean is None): mean=0.0 if(std is None): std = 1.0/(float(units[0])**0.5) self.n_in,self.n_out = units self.no_bias = no_bias if(activation is None): self.activation = 'sigmoid' else: self.activation = activation if(eur): if(self.activation =='sigmoid'): self.W = tf.Variable(tf.random_uniform(units,minval=(-4*(6.0/(self.n_in+self.n_out))**0.5),maxval=(4*(6.0/(self.n_in+self.n_out))**0.5)),name="W") elif(self.activation == "leaky_relu6" or self.activation == 'relu' or self.activation == 'relu6' or self.activation == "leaky_relu"): self.W = tf.Variable(tf.random_uniform(units,minval=0,maxval=(6.0/(self.n_in+self.n_out))**0.5),name="W") elif(self.activation == 'tanh'): self.W = tf.Variable(tf.random_uniform(units,minval=(-(6.0/(self.n_in+self.n_out))**0.5),maxval=((6.0/(self.n_in+self.n_out))**0.5)),name="W") else: self.W = tf.Variable(tf.truncated_normal(units,mean=mean,stddev=std),name="W") else: self.W = tf.Variable(tf.truncated_normal(units,mean=mean,stddev=std),name="W") if(no_bias): self.b = None else: self.b = tf.Variable(tf.zeros([units[1]]),name="b")
def testCustomGrad(self): def fn(a, b, c): return tf.layers.dense(a, 10, use_bias=False) + tf.matmul(b, c) def grad_fn(inputs, variables, unused_outputs, unused_grad_outputs): grad_inputs = [tf.ones_like(t) * (i + 1.) for i, t in enumerate(inputs)] grad_vars = [ tf.ones_like(t) * (i + len(inputs) + 1.) for i, t in enumerate(variables) ] return grad_inputs, grad_vars a = tf.random_uniform([11, 6]) b = tf.random_uniform([11, 7]) c = tf.random_uniform([7, 10]) w = tf.random_uniform([6, 10]) out = common_layers.fn_with_custom_grad(grad_fn)(fn)(a, b, c) loss = tf.reduce_mean(out) grads = tf.gradients(loss, [a, b, c, tf.trainable_variables()[0]]) expected_grads = [ tf.ones_like(t) * (i + 1.) for i, t in enumerate([a, b, c, w]) ] with self.test_session() as sess: sess.run(tf.global_variables_initializer()) g_val, eg_val = sess.run([grads, expected_grads]) for g1, g2 in zip(g_val, eg_val): self.assertAllClose(g1, g2)
def __init__(self, args): with tf.device(args.device): def circle(x): spherenet = tf.square(x) spherenet = tf.reduce_sum(spherenet, 1) lam = tf.sqrt(spherenet) return x/tf.reshape(lam,[int(lam.get_shape()[0]), 1]) def modes(x): shape = x.get_shape() return tf.round(x*2)/2.0#+tf.random_normal(shape, 0, 0.04) if args.distribution == 'circle': x = tf.random_normal([args.batch_size, 2]) x = circle(x) elif args.distribution == 'modes': x = tf.random_uniform([args.batch_size, 2], -1, 1) x = modes(x) elif args.distribution == 'modal-gaussian': x = tf.random_uniform([args.batch_size, 2], -1, 1) y = tf.random_normal([args.batch_size, 2], stddev=0.04, mean=0.15) x = tf.round(x) + y elif args.distribution == 'sin': x = tf.random_uniform((1, args.batch_size), -10.5, 10.5 ) x = tf.transpose(x) r_data = tf.random_normal((args.batch_size,1), mean=0, stddev=0.1) xy = tf.sin(0.75*x)*7.0+x*0.5+r_data*1.0 x = tf.concat([xy,x], 1)/16.0 elif args.distribution == 'static-point': x = tf.ones([args.batch_size, 2]) self.x = x self.xy = tf.zeros_like(self.x)
def __init__(self, dim_image, n_words, dim_hidden, batch_size, n_lstm_steps, drop_out_rate, bias_init_vector=None): self.dim_image = dim_image self.n_words = n_words self.dim_hidden = dim_hidden self.batch_size = batch_size self.n_lstm_steps = n_lstm_steps self.drop_out_rate = drop_out_rate with tf.device("/gpu:2"): self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_hidden], -0.1, 0.1), name='Wemb') # self.lstm1 = rnn_cell.BasicLSTMCell(dim_hidden) # self.lstm2 = rnn_cell.BasicLSTMCell(dim_hidden) self.lstm1 = rnn_cell.LSTMCell(self.dim_hidden,self.dim_hidden,use_peepholes = True) self.lstm1_dropout = rnn_cell.DropoutWrapper(self.lstm1,output_keep_prob=1 - self.drop_out_rate) self.lstm2 = rnn_cell.LSTMCell(self.dim_hidden,self.dim_hidden,use_peepholes = True) self.lstm2_dropout = rnn_cell.DropoutWrapper(self.lstm2,output_keep_prob=1 - self.drop_out_rate) # W is Weight, b is Bias self.encode_image_W = tf.Variable( tf.random_uniform([dim_image, dim_hidden], -0.1, 0.1), name='encode_image_W') self.encode_image_b = tf.Variable( tf.zeros([dim_hidden]), name='encode_image_b') self.embed_word_W = tf.Variable(tf.random_uniform([dim_hidden, n_words], -0.1,0.1), name='embed_word_W') if bias_init_vector is not None: self.embed_word_b = tf.Variable(bias_init_vector.astype(np.float32), name='embed_word_b') else: self.embed_word_b = tf.Variable(tf.zeros([n_words]), name='embed_word_b')
def _generate_synthetic_snli_data_batch(sequence_length, batch_size, vocab_size): """Generate a fake batch of SNLI data for testing.""" with tf.device("cpu:0"): labels = tf.random_uniform([batch_size], minval=1, maxval=4, dtype=tf.int64) prem = tf.random_uniform( (sequence_length, batch_size), maxval=vocab_size, dtype=tf.int64) prem_trans = tf.constant(np.array( [[3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3, 2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2, 3, 2, 2]] * batch_size, dtype=np.int64).T) hypo = tf.random_uniform( (sequence_length, batch_size), maxval=vocab_size, dtype=tf.int64) hypo_trans = tf.constant(np.array( [[3, 3, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3, 2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2, 3, 2, 2]] * batch_size, dtype=np.int64).T) if tfe.num_gpus(): labels = labels.gpu() prem = prem.gpu() prem_trans = prem_trans.gpu() hypo = hypo.gpu() hypo_trans = hypo_trans.gpu() return labels, prem, prem_trans, hypo, hypo_trans
def testDiscretizedMixLogisticLoss(self): batch = 2 height = 4 width = 4 channels = 3 num_mixtures = 5 logits = tf.concat( # assign all probability mass to first component [tf.ones([batch, height, width, 1]) * 1e8, tf.zeros([batch, height, width, num_mixtures - 1])], axis=-1) locs = tf.random_uniform([batch, height, width, num_mixtures * 3], minval=-.9, maxval=.9) log_scales = tf.random_uniform([batch, height, width, num_mixtures * 3], minval=-1., maxval=1.) coeffs = tf.atanh(tf.zeros([batch, height, width, num_mixtures * 3])) pred = tf.concat([logits, locs, log_scales, coeffs], axis=-1) # Test labels that don't satisfy edge cases where 8-bit value is 0 or 255. labels = tf.random_uniform([batch, height, width, channels], minval=-.9, maxval=.9) locs_0 = locs[..., :3] log_scales_0 = log_scales[..., :3] centered_labels = labels - locs_0 inv_stdv = tf.exp(-log_scales_0) plus_in = inv_stdv * (centered_labels + 1. / 255.) min_in = inv_stdv * (centered_labels - 1. / 255.) cdf_plus = tf.nn.sigmoid(plus_in) cdf_min = tf.nn.sigmoid(min_in) expected_loss = -tf.reduce_sum(tf.log(cdf_plus - cdf_min), axis=-1) actual_loss = common_layers.discretized_mix_logistic_loss( pred=pred, labels=labels) actual_loss_val, expected_loss_val = self.evaluate( [actual_loss, expected_loss]) self.assertAllClose(actual_loss_val, expected_loss_val, rtol=1e-5)
def _random_crop_and_flip(image, crop_height, crop_width): """Crops the given image to a random part of the image, and randomly flips. Args: image: a 3-D image tensor crop_height: the new height. crop_width: the new width. Returns: 3-D tensor with cropped image. """ height, width = _get_h_w(image) # Create a random bounding box. # Use tf.random_uniform and not numpy.random.rand as doing the former would # generate random numbers at graph eval time, unlike the latter which # generates random numbers at graph definition time. total_crop_height = (height - crop_height) crop_top = tf.random_uniform([], maxval=total_crop_height + 1, dtype=tf.int32) total_crop_width = (width - crop_width) crop_left = tf.random_uniform([], maxval=total_crop_width + 1, dtype=tf.int32) cropped = tf.slice( image, [crop_top, crop_left, 0], [crop_height, crop_width, -1]) cropped = tf.image.random_flip_left_right(cropped) return cropped
def test_get_expected_feature_map_shapes_with_inception_v3(self): image_features = { 'Mixed_5d': tf.random_uniform([4, 35, 35, 256], dtype=tf.float32), 'Mixed_6e': tf.random_uniform([4, 17, 17, 576], dtype=tf.float32), 'Mixed_7c': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32) } feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=INCEPTION_V3_LAYOUT, depth_multiplier=1, min_depth=32, insert_1x1_conv=True, image_features=image_features) expected_feature_map_shapes = { 'Mixed_5d': (4, 35, 35, 256), 'Mixed_6e': (4, 17, 17, 576), 'Mixed_7c': (4, 8, 8, 1024), 'Mixed_7c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512), 'Mixed_7c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256), 'Mixed_7c_2_Conv2d_5_3x3_s2_128': (4, 1, 1, 128)} init_op = tf.global_variables_initializer() with self.test_session() as sess: sess.run(init_op) out_feature_maps = sess.run(feature_maps) out_feature_map_shapes = dict( (key, value.shape) for key, value in out_feature_maps.items()) self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
def test_get_expected_feature_map_shapes_with_inception_v2(self, use_keras): image_features = { 'Mixed_3c': tf.random_uniform([4, 28, 28, 256], dtype=tf.float32), 'Mixed_4c': tf.random_uniform([4, 14, 14, 576], dtype=tf.float32), 'Mixed_5c': tf.random_uniform([4, 7, 7, 1024], dtype=tf.float32) } feature_map_generator = self._build_feature_map_generator( feature_map_layout=INCEPTION_V2_LAYOUT, use_keras=use_keras ) feature_maps = feature_map_generator(image_features) expected_feature_map_shapes = { 'Mixed_3c': (4, 28, 28, 256), 'Mixed_4c': (4, 14, 14, 576), 'Mixed_5c': (4, 7, 7, 1024), 'Mixed_5c_2_Conv2d_3_3x3_s2_512': (4, 4, 4, 512), 'Mixed_5c_2_Conv2d_4_3x3_s2_256': (4, 2, 2, 256), 'Mixed_5c_2_Conv2d_5_3x3_s2_256': (4, 1, 1, 256)} init_op = tf.global_variables_initializer() with self.test_session() as sess: sess.run(init_op) out_feature_maps = sess.run(feature_maps) out_feature_map_shapes = dict( (key, value.shape) for key, value in out_feature_maps.items()) self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def denseNet(self, hidden=20, depth=3, act=tf.nn.tanh, dropout=True, norm=None): # if (hidden > 100): print("WARNING: denseNet uses quadratic mem for " + str(hidden)) if (depth < 3): print( "WARNING: did you mean to use Fully connected layer 'dense'? Expecting depth>3 vs " + str(depth)) inputs = self.last_layer inputs_width = self.last_width width = hidden while depth > 0: with tf.name_scope('DenNet_{:d}'.format(width)) as scope: print("dense width ", inputs_width, "x", width) nr = len(self.layers) weights = tf.Variable(tf.random_uniform([inputs_width, width], minval=-1. / width, maxval=1. / width), name="weights") bias = tf.Variable(tf.random_uniform([width], minval=-1. / width, maxval=1. / width), name="bias") # auto nr + context dense1 = tf.matmul(inputs, weights, name='dense_' + str(nr)) + bias tf.summary.histogram('dense_' + str(nr), dense1) tf.summary.histogram('dense_' + str(nr) + '/sparsity', tf.nn.zero_fraction(dense1)) tf.summary.histogram('weights_' + str(nr), weights) tf.summary.histogram('weights_' + str(nr) + '/sparsity', tf.nn.zero_fraction(weights)) tf.summary.histogram('bias_' + str(nr), bias) if act: dense1 = act(dense1) if norm: dense1 = self.norm(dense1, lsize=1) # SHAPE! if dropout: dense1 = tf.nn.dropout(dense1, self.keep_prob) self.add(dense1) self.last_width = width inputs = tf.concat(1, [inputs, dense1]) inputs_width += width depth = depth - 1 self.last_width = width
def random_batch(batch_size, config): shape = (batch_size,) + config.input_shape images = tf.random_uniform(shape) labels = tf.random_uniform( [batch_size], minval=0, maxval=config.n_classes, dtype=tf.int32) return images, labels
def test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1( self, use_keras): image_features = { 'Conv2d_11_pointwise': tf.random_uniform([4, 16, 16, 512], dtype=tf.float32), 'Conv2d_13_pointwise': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32), } feature_map_generator = self._build_feature_map_generator( feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT, use_keras=use_keras ) feature_maps = feature_map_generator(image_features) expected_feature_map_shapes = { 'Conv2d_11_pointwise': (4, 16, 16, 512), 'Conv2d_13_pointwise': (4, 8, 8, 1024), 'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_512': (4, 4, 4, 512), 'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256': (4, 2, 2, 256), 'Conv2d_13_pointwise_2_Conv2d_4_2x2_s2_256': (4, 1, 1, 256)} init_op = tf.global_variables_initializer() with self.test_session() as sess: sess.run(init_op) out_feature_maps = sess.run(feature_maps) out_feature_map_shapes = dict( (key, value.shape) for key, value in out_feature_maps.items()) self.assertDictEqual(expected_feature_map_shapes, out_feature_map_shapes)
def _setup_variables(self): with tf.name_scope("autoencoder_variables"): for i in range(self._num_hidden_layers): name_w = self._weights_str.format(i + 1) w_shape = (self._shape[i], self._shape[i + 1]) # We use xavier initializer here initializer_bound = tf.mul(4.0, tf.sqrt(6.0 / (w_shape[0] + w_shape[1]))) w_init = tf.random_uniform(w_shape, -1 * initializer_bound, 1 * initializer_bound) self[name_w] = tf.Variable(w_init, name = name_w, trainable = True) name_b = self._bias_str.format(i + 1) b_shape = (self._shape[i + 1], ) b_init = tf.zeros(b_shape) self[name_b] = tf.Variable(b_init, name = name_b, trainable = True) print(w_shape, b_shape) #Output Layer: No weights on the output layer, we only have the bias name_w = self._weights_str.format(self._num_hidden_layers + 1) + "_out" w_shape = (self._shape[self._num_hidden_layers], self._shape[self._num_hidden_layers + 1]) w_init = tf.random_uniform(w_shape, -1 * initializer_bound, 1 * initializer_bound) self[name_w] = tf.Variable(w_init, name = name_w, trainable = True) name_b = self._bias_str.format(self._num_hidden_layers + 1) + "_out" b_shape = (self._shape[self._num_hidden_layers + 1], ) b_init = tf.zeros(b_shape) self[name_b] = tf.Variable(b_init, name = name_b, trainable = True) print(w_shape, b_shape) print(self._variables.keys())
def __init__(self, config): self.config = config self.input = tf.placeholder('int32', [self.config.batch_size, config.max_seq_len], name='input') self.labels = tf.placeholder('int64', [self.config.batch_size], name='labels') self.labels_one_hot = tf.one_hot(indices=self.labels, depth=config.output_dim, on_value=1.0, off_value=0.0, axis=-1) self.gru = GRUCell(config.hidden_state_dim) embeddings_we = tf.get_variable('word_embeddings', initializer=tf.random_uniform([config.vocab_size, config.embedding_dim], -1.0, 1.0)) self.emb = embed_input = tf.nn.embedding_lookup(embeddings_we, self.input) inputs = [tf.squeeze(i, squeeze_dims=[1]) for i in tf.split(1, config.max_seq_len, embed_input)] outputs, last_slu_state = tf.nn.rnn( cell=self.gru, inputs=inputs, dtype=tf.float32,) w_project = tf.get_variable('project2labels', initializer=tf.random_uniform([config.hidden_state_dim, config.output_dim], -1.0, 1.0)) self.logits = logits_bo = tf.matmul(last_slu_state, w_project) tf.histogram_summary('logits', logits_bo) self.probabilities = tf.nn.softmax(logits_bo) self.loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits_bo, self.labels_one_hot)) self.predict = tf.nn.softmax(logits_bo) # TensorBoard self.accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(self.predict, 1), self.labels), 'float32'), name='accuracy') tf.scalar_summary('CCE loss', self.loss) tf.scalar_summary('Accuracy', self.accuracy) self.tb_info = tf.merge_all_summaries()
def __init__(self, rnn_size, rnn_layer, batch_size, input_embedding_size, dim_image, dim_hidden, max_words_q, vocabulary_size, drop_out_rate): self.rnn_size = rnn_size self.rnn_layer = rnn_layer self.batch_size = batch_size self.input_embedding_size = input_embedding_size self.dim_image = dim_image self.dim_hidden = dim_hidden self.max_words_q = max_words_q self.vocabulary_size = vocabulary_size self.drop_out_rate = drop_out_rate # Network definitions # question-embedding self.embed_ques_W = tf.Variable(tf.random_uniform([self.vocabulary_size, self.input_embedding_size], -0.08, 0.08), name='embed_ques_W') # encoder: RNN body self.lstm = rnn_cell.BasicLSTMCell(rnn_size) # change basic LSTM to LSTM self.lstm_dropout = rnn_cell.DropoutWrapper(self.lstm, output_keep_prob = 1 - self.drop_out_rate) self.stacked_lstm = rnn_cell.MultiRNNCell([self.lstm_dropout] * self.rnn_layer) # MULTIMODAL # state-embedding self.embed_state_W = tf.Variable(tf.random_uniform([2*rnn_size*rnn_layer, self.dim_hidden], -0.08,0.08),name='embed_state_W') # image-embedding self.embed_image_W = tf.Variable(tf.random_uniform([dim_image, self.dim_hidden], -0.08, 0.08), name='embed_image_W') # score-embedding self.embed_scor_W = tf.Variable(tf.random_uniform([dim_hidden, num_output], -0.08, 0.08), name='embed_scor_W')
def input_fn(params): """Generated input_fn for the given epoch.""" batch_size = (params["batch_size"] if is_training else params["eval_batch_size"] or params["batch_size"]) num_users = params["num_users"] num_items = params["num_items"] users = tf.random_uniform([batch_size], dtype=tf.int32, minval=0, maxval=num_users) items = tf.random_uniform([batch_size], dtype=tf.int32, minval=0, maxval=num_items) if is_training: labels = tf.random_uniform([batch_size], dtype=tf.int32, minval=0, maxval=2) data = { movielens.USER_COLUMN: users, movielens.ITEM_COLUMN: items, }, labels else: dupe_mask = tf.cast(tf.random_uniform([batch_size], dtype=tf.int32, minval=0, maxval=2), tf.bool) data = { movielens.USER_COLUMN: users, movielens.ITEM_COLUMN: items, rconst.DUPLICATE_MASK: dupe_mask, } dataset = tf.data.Dataset.from_tensors(data).repeat( SYNTHETIC_BATCHES_PER_EPOCH) dataset = dataset.prefetch(32) return dataset
def dae(x, hparams, name): with tf.variable_scope(name): m = tf.layers.dense(x, hparams.v_size, name="mask") if hparams.softmax_k > 0: m, kl = top_k_softmax(m, hparams.softmax_k) return m, m, 1.0 - tf.reduce_mean(kl) logsm = tf.nn.log_softmax(m) # Gumbel-softmax sample. gumbel_samples = gumbel_sample(common_layers.shape_list(m)) steps = hparams.kl_warmup_steps gumbel_samples *= common_layers.inverse_exp_decay(steps // 5) * 0.5 temperature = 1.2 - common_layers.inverse_lin_decay(steps) # 10% of the time keep reasonably high temperature to keep learning. temperature = tf.cond(tf.less(tf.random_uniform([]), 0.9), lambda: temperature, lambda: tf.random_uniform([], minval=0.5, maxval=1.0)) s = tf.nn.softmax((logsm + gumbel_samples) / temperature) m = tf.nn.softmax(m) kl = - tf.reduce_max(logsm, axis=-1) if _DO_SUMMARIES: tf.summary.histogram("max-log", tf.reshape(kl, [-1])) # Calculate the argmax and construct hot vectors. maxvec = tf.reshape(tf.argmax(m, axis=-1), [-1]) maxvhot = tf.stop_gradient(tf.one_hot(maxvec, hparams.v_size)) # Add losses that prevent too few being used. distrib = tf.reshape(logsm, [-1, hparams.v_size]) * maxvhot d_mean = tf.reduce_mean(distrib, axis=[0], keep_dims=True) d_variance = tf.reduce_mean(tf.square(distrib - d_mean), axis=[0]) d_dev = - tf.reduce_mean(d_variance) ret = s if hparams.mode != tf.contrib.learn.ModeKeys.TRAIN: ret = tf.reshape(maxvhot, common_layers.shape_list(s)) # Just hot @eval. return m, ret, d_dev * 5.0 + tf.reduce_mean(kl) * 0.002
def init_var_map(init_vars, init_path=None): if init_path is not None: load_var_map = pkl.load(open(init_path, 'rb')) print('load variable map from', init_path, load_var_map.keys()) var_map = {} for var_name, var_shape, init_method, dtype in init_vars: if init_method == 'zero': var_map[var_name] = tf.Variable(tf.zeros(var_shape, dtype=dtype), name=var_name, dtype=dtype) elif init_method == 'one': var_map[var_name] = tf.Variable(tf.ones(var_shape, dtype=dtype), name=var_name, dtype=dtype) elif init_method == 'normal': var_map[var_name] = tf.Variable(tf.random_normal(var_shape, mean=0.0, stddev=STDDEV, dtype=dtype), name=var_name, dtype=dtype) elif init_method == 'tnormal': var_map[var_name] = tf.Variable(tf.truncated_normal(var_shape, mean=0.0, stddev=STDDEV, dtype=dtype), name=var_name, dtype=dtype) elif init_method == 'uniform': var_map[var_name] = tf.Variable(tf.random_uniform(var_shape, minval=MINVAL, maxval=MAXVAL, dtype=dtype), name=var_name, dtype=dtype) elif init_method == 'xavier': maxval = np.sqrt(6. / np.sum(var_shape)) minval = -maxval var_map[var_name] = tf.Variable(tf.random_uniform(var_shape, minval=minval, maxval=maxval, dtype=dtype), name=var_name, dtype=dtype) elif isinstance(init_method, int) or isinstance(init_method, float): var_map[var_name] = tf.Variable(tf.ones(var_shape, dtype=dtype) * init_method, name=var_name, dtype=dtype) elif init_method in load_var_map: if load_var_map[init_method].shape == tuple(var_shape): var_map[var_name] = tf.Variable(load_var_map[init_method], name=var_name, dtype=dtype) else: print('BadParam: init method', init_method, 'shape', var_shape, load_var_map[init_method].shape) else: print('BadParam: init method', init_method) return var_map
def __init__(self, dim_image, dim_embed, dim_hidden, batch_size, n_lstm_steps, n_words, bias_init_vector=None): self.dim_image = np.int(dim_image) self.dim_embed = np.int(dim_embed) self.dim_hidden = np.int(dim_hidden) self.batch_size = np.int(batch_size) self.n_lstm_steps = np.int(n_lstm_steps) self.n_words = np.int(n_words) with tf.device("/cpu:0"): self.Wemb = tf.Variable(tf.random_uniform([n_words, dim_embed], -0.1, 0.1), name='Wemb') self.bemb = self.init_bias(dim_embed, name='bemb') self.lstm = rnn_cell.BasicLSTMCell(dim_hidden) #self.encode_img_W = self.init_weight(dim_image, dim_hidden, name='encode_img_W') self.encode_img_W = tf.Variable(tf.random_uniform([dim_image, dim_hidden], -0.1, 0.1), name='encode_img_W') self.encode_img_b = self.init_bias(dim_hidden, name='encode_img_b') self.embed_word_W = tf.Variable(tf.random_uniform([dim_hidden, n_words], -0.1, 0.1), name='embed_word_W') if bias_init_vector is not None: self.embed_word_b = tf.Variable(bias_init_vector.astype(np.float32), name='embed_word_b') else: self.embed_word_b = self.init_bias(n_words, name='embed_word_b')
def test_get_expected_feature_map_shapes_with_embedded_ssd_mobilenet_v1( self): image_features = { 'Conv2d_11_pointwise': tf.random_uniform([4, 16, 16, 512], dtype=tf.float32), 'Conv2d_13_pointwise': tf.random_uniform([4, 8, 8, 1024], dtype=tf.float32), } feature_maps = feature_map_generators.multi_resolution_feature_maps( feature_map_layout=EMBEDDED_SSD_MOBILENET_V1_LAYOUT, depth_multiplier=1, min_depth=32, insert_1x1_conv=True, image_features=image_features) expected_feature_map_shapes = { 'Conv2d_11_pointwise': (4, 16, 16, 512), 'Conv2d_13_pointwise': (4, 8, 8, 1024), 'Conv2d_13_pointwise_2_Conv2d_2_3x3_s2_512': (4, 4, 4, 512), 'Conv2d_13_pointwise_2_Conv2d_3_3x3_s2_256': (4, 2, 2, 256), 'Conv2d_13_pointwise_2_Conv2d_4_2x2_s2_256': (4, 1, 1, 256)} init_op = tf.global_variables_initializer() with self.test_session() as sess: sess.run(init_op) out_feature_maps = sess.run(feature_maps) out_feature_map_shapes = dict( (key, value.shape) for key, value in out_feature_maps.items()) self.assertDictEqual(out_feature_map_shapes, expected_feature_map_shapes)
def initialize_mod_binary_MERA(phys_dim, chi, dtype=tf.float64): """ Parameters: ------------------- phys_dim: int Hilbert space dimension of the bottom layer chi: int maximum bond dimension dtype: tensorflow dtype dtype of the MERA tensors Returns: ------------------- (wC, vC, uC, rhoAB, rhoBA) wC, vC, uC: list of tf.Tensor rhoAB, rhoBA: tf.Tensor """ wC, vC, uC = increase_bond_dimension_by_adding_layers(chi_new=chi, wC=[tf.random_uniform(shape=[phys_dim, phys_dim, phys_dim],dtype=dtype)], vC=[tf.random_uniform(shape=[phys_dim, phys_dim, phys_dim],dtype=dtype)], uC=[tf.random_uniform(shape=[phys_dim, phys_dim, phys_dim, phys_dim],dtype=dtype)]) chi_top = wC[-1].shape[2] rhoAB = tf.reshape(tf.eye(chi_top * chi_top, dtype=dtype), (chi_top, chi_top, chi_top, chi_top)) rhoBA = tf.reshape(tf.eye(chi_top * chi_top, dtype=dtype), (chi_top, chi_top, chi_top, chi_top)) return wC, vC, uC, rhoAB, rhoBA
def testUnit4(self): x1 = tf.random_uniform([1, 19, 19, 1024]) x2 = tf.random_uniform([1, 19, 19, 1024]) x1, x2 = revnet.unit(x1, x2, block_num=4, depth=416, num_layers=1, stride=2) self.assertEquals(x1.get_shape().as_list(), [1, 10, 10, 1664]) self.assertEquals(x2.get_shape().as_list(), [1, 10, 10, 1664])
def testUnit1(self): x1 = tf.random_uniform([4, 74, 74, 256]) x2 = tf.random_uniform([4, 74, 74, 256]) x1, x2 = revnet.unit(x1, x2, block_num=1, depth=64, first_batch_norm=True, num_layers=1) self.assertEquals(x1.get_shape().as_list(), [4, 74, 74, 256]) self.assertEquals(x2.get_shape().as_list(), [4, 74, 74, 256])
def testUnit3(self): x1 = tf.random_uniform([1, 37, 37, 512]) x2 = tf.random_uniform([1, 37, 37, 512]) x1, x2 = revnet.unit(x1, x2, block_num=3, depth=256, num_layers=10, stride=2) self.assertEquals(x1.get_shape().as_list(), [1, 19, 19, 1024]) self.assertEquals(x2.get_shape().as_list(), [1, 19, 19, 1024])
def benchmarkEagerLinearRegression(self): num_batches = 200 batch_size = 64 dataset = linear_regression.synthetic_dataset( w=tf.random_uniform([3, 1]), b=tf.random_uniform([1]), noise_level=0.01, batch_size=batch_size, num_batches=num_batches) burn_in_dataset = dataset.take(10) model = linear_regression.LinearModel() with tf.device(device()): optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.1) # Perform burn-in. linear_regression.fit(model, burn_in_dataset, optimizer) start_time = time.time() linear_regression.fit(model, dataset, optimizer) wall_time = time.time() - start_time examples_per_sec = num_batches * batch_size / wall_time self.report_benchmark( name="eager_train_%s" % ("gpu" if tfe.num_gpus() > 0 else "cpu"), iters=num_batches, extras={"examples_per_sec": examples_per_sec}, wall_time=wall_time)
def get_online_sequences(sequence_length, batch_size): """Gets tensor which constantly produce new random examples. Args: sequence_length: total length of the sequences. batch_size: how many at a time. Returns: (data, targets): data is `[sequence_length, batch_size, 2]` and targets are `[batch_size]`. """ # getting the random channel is easy random_data = tf.random_uniform([sequence_length, batch_size, 1], minval=0.0, maxval=1.0) # now we need a random marker in each half of the data random_index_1 = tf.random_uniform([1, batch_size], minval=0, maxval=sequence_length//2, dtype=tf.int32) random_index_2 = tf.random_uniform([1, batch_size], minval=0, maxval=sequence_length//2, dtype=tf.int32) markers = tf.concat(axis=2, values=[tf.one_hot(random_index_1, sequence_length//2), tf.one_hot(random_index_2, sequence_length//2)]) markers = tf.transpose(markers) targets = tf.reduce_sum(random_data * markers, axis=0) return tf.concat(axis=2, values=[random_data, markers]), tf.squeeze(targets)
def testUnit3D(self): x1 = tf.random_uniform([4, 74, 74, 74, 256]) x2 = tf.random_uniform([4, 74, 74, 74, 256]) x1, x2 = revnet.unit(x1, x2, block_num=5, depth=128, num_layers=1, dim='3d', stride=2) self.assertEquals(x1.get_shape().as_list(), [4, 37, 37, 37, 512]) self.assertEquals(x2.get_shape().as_list(), [4, 37, 37, 37, 512])
def __init__(self, dh, dq, da, di, max_q, Nq, Na, cell='rnn',trainable_embeddings=True): self.dh = dh self.dq = dq self.da = da self.di = di self.max_q = max_q self.Nq = Nq self.Na = Na self.cell = cell with tf.device('/cpu:0'): self.qemb_W = tf.get_variable('qemb_w', initializer=tf.random_uniform([self.Nq, self.dq], -0.1, 0.1), trainable = trainable_embeddings) self.aemb_W = tf.get_variable(name='aemb_w', initializer=tf.random_uniform([self.dh, self.Na], -0.1, 0.1)) self.aemb_b = tf.get_variable(name='aemb_b', initializer=tf.zeros([self.Na])) self.Wi = tf.get_variable(name='Wi', shape=[self.di, self.dq], initializer=tf.contrib.layers.xavier_initializer()) self.bi = tf.get_variable(name='bi', initializer=tf.zeros([self.dq])) if self.cell == 'rnn': self.recur = tf.nn.rnn_cell.RNNCell(self.dh) elif self.cell == 'lstm': self.recur = tf.nn.rnn_cell.LSTMCell(self.dh) elif self.cell == 'gru': self.recur = tf.nn.rnn_cell.GRUCell(self.dh) else: raise NotImplementedError
def test_horovod_allreduce_error(self): """Test that the allreduce raises an error if different ranks try to send tensors of different rank or dimension.""" hvd.init() rank = hvd.rank() size = hvd.size() # This test does not apply if there is only one worker. if size == 1: return with self.test_session() as session: # Same rank, different dimension tf.set_random_seed(1234) dims = [17 + rank] * 3 tensor = tf.random_uniform(dims, -1.0, 1.0) with self.assertRaises(tf.errors.FailedPreconditionError): session.run(hvd.allreduce(tensor)) # Same number of elements, different rank tf.set_random_seed(1234) if rank == 0: dims = [17, 23 * 57] else: dims = [17, 23, 57] tensor = tf.random_uniform(dims, -1.0, 1.0) with self.assertRaises(tf.errors.FailedPreconditionError): session.run(hvd.allreduce(tensor))
import tensorflow as tf sess = tf.Session() my_tensor = tf.random_uniform((4, 4), 0, 1) print(my_tensor) my_var = tf.Variable(initial_value=my_tensor) print(my_var) #sess.run(my_var) init = tf.global_variables_initializer() sess.run(init) print(sess.run(my_var)) ph = tf.placeholder(tf.float32, shape=(None, 5))
import tensorflow as tf import numpy as np xy = np.loadtxt('logisticTrain.txt', unpack=True, dtype='float32') x_data = xy[0:-1] y_data = xy[-1] X = tf.placeholder(tf.float32) Y = tf.placeholder(tf.float32) W = tf.Variable(tf.random_uniform([1, len(x_data)], -1.0, 1.0)) # Our hypothesis h = tf.matmul(W, X) hypothesis = tf.div(1., 1 + tf.exp(-h)) # Cost function cost = -tf.reduce_mean(Y * tf.log(hypothesis) + (1 - Y) * tf.log(1 - hypothesis)) # Minimize a = tf.Variable(0.1) # Learning rate, alpha optimizer = tf.train.GradientDescentOptimizer(a) train = optimizer.minimize(cost) # Before starting, initialize the variables. We will `run` this first. init = tf.initialize_all_variables() # Launch the graph. with tf.Session() as sess:
def main(): print("Local rank: ", hvd.local_rank(), hvd.size()) logdir = osp.join(FLAGS.logdir, FLAGS.exp) if hvd.rank() == 0: if not osp.exists(logdir): os.makedirs(logdir) logger = TensorBoardOutputFormat(logdir) else: logger = None LABEL = None print("Loading data...") if FLAGS.dataset == 'cifar10': dataset = Cifar10(augment=FLAGS.augment, rescale=FLAGS.rescale) test_dataset = Cifar10(train=False, rescale=FLAGS.rescale) channel_num = 3 X_NOISE = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32) X = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32) LABEL = tf.placeholder(shape=(None, 10), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 10), dtype=tf.float32) if FLAGS.large_model: model = ResNet32Large( num_channels=channel_num, num_filters=128, train=True) elif FLAGS.larger_model: model = ResNet32Larger( num_channels=channel_num, num_filters=128) elif FLAGS.wider_model: model = ResNet32Wider( num_channels=channel_num, num_filters=192) else: model = ResNet32( num_channels=channel_num, num_filters=128) elif FLAGS.dataset == 'imagenet': dataset = Imagenet(train=True) test_dataset = Imagenet(train=False) channel_num = 3 X_NOISE = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32) X = tf.placeholder(shape=(None, 32, 32, 3), dtype=tf.float32) LABEL = tf.placeholder(shape=(None, 1000), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 1000), dtype=tf.float32) model = ResNet32Wider( num_channels=channel_num, num_filters=256) elif FLAGS.dataset == 'imagenetfull': channel_num = 3 X_NOISE = tf.placeholder(shape=(None, 128, 128, 3), dtype=tf.float32) X = tf.placeholder(shape=(None, 128, 128, 3), dtype=tf.float32) LABEL = tf.placeholder(shape=(None, 1000), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 1000), dtype=tf.float32) model = ResNet128( num_channels=channel_num, num_filters=64) elif FLAGS.dataset == 'mnist': dataset = Mnist(rescale=FLAGS.rescale) test_dataset = dataset channel_num = 1 X_NOISE = tf.placeholder(shape=(None, 28, 28), dtype=tf.float32) X = tf.placeholder(shape=(None, 28, 28), dtype=tf.float32) LABEL = tf.placeholder(shape=(None, 10), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 10), dtype=tf.float32) model = MnistNet( num_channels=channel_num, num_filters=FLAGS.num_filters) elif FLAGS.dataset == 'dsprites': dataset = DSprites( cond_shape=FLAGS.cond_shape, cond_size=FLAGS.cond_size, cond_pos=FLAGS.cond_pos, cond_rot=FLAGS.cond_rot) test_dataset = dataset channel_num = 1 X_NOISE = tf.placeholder(shape=(None, 64, 64), dtype=tf.float32) X = tf.placeholder(shape=(None, 64, 64), dtype=tf.float32) if FLAGS.dpos_only: LABEL = tf.placeholder(shape=(None, 2), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 2), dtype=tf.float32) elif FLAGS.dsize_only: LABEL = tf.placeholder(shape=(None, 1), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 1), dtype=tf.float32) elif FLAGS.drot_only: LABEL = tf.placeholder(shape=(None, 2), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 2), dtype=tf.float32) elif FLAGS.cond_size: LABEL = tf.placeholder(shape=(None, 1), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 1), dtype=tf.float32) elif FLAGS.cond_shape: LABEL = tf.placeholder(shape=(None, 3), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 3), dtype=tf.float32) elif FLAGS.cond_pos: LABEL = tf.placeholder(shape=(None, 2), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 2), dtype=tf.float32) elif FLAGS.cond_rot: LABEL = tf.placeholder(shape=(None, 2), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 2), dtype=tf.float32) else: LABEL = tf.placeholder(shape=(None, 3), dtype=tf.float32) LABEL_POS = tf.placeholder(shape=(None, 3), dtype=tf.float32) model = DspritesNet( num_channels=channel_num, num_filters=FLAGS.num_filters, cond_size=FLAGS.cond_size, cond_shape=FLAGS.cond_shape, cond_pos=FLAGS.cond_pos, cond_rot=FLAGS.cond_rot) print("Done loading...") if FLAGS.dataset == "imagenetfull": # In the case of full imagenet, use custom_tensorflow dataloader data_loader = TFImagenetLoader('train', FLAGS.batch_size, hvd.rank(), hvd.size(), rescale=FLAGS.rescale) else: data_loader = DataLoader( dataset, batch_size=FLAGS.batch_size, num_workers=FLAGS.data_workers, drop_last=True, shuffle=True) batch_size = FLAGS.batch_size weights = [model.construct_weights('context_0')] Y = tf.placeholder(shape=(None), dtype=tf.int32) # Varibles to run in training X_SPLIT = tf.split(X, FLAGS.num_gpus) X_NOISE_SPLIT = tf.split(X_NOISE, FLAGS.num_gpus) LABEL_SPLIT = tf.split(LABEL, FLAGS.num_gpus) LABEL_POS_SPLIT = tf.split(LABEL_POS, FLAGS.num_gpus) LABEL_SPLIT_INIT = list(LABEL_SPLIT) tower_grads = [] tower_gen_grads = [] x_mod_list = [] optimizer = AdamOptimizer(FLAGS.lr, beta1=0.0, beta2=0.999) optimizer = hvd.DistributedOptimizer(optimizer) for j in range(FLAGS.num_gpus): if FLAGS.model_cclass: ind_batch_size = FLAGS.batch_size // FLAGS.num_gpus label_tensor = tf.Variable( tf.convert_to_tensor( np.reshape( np.tile(np.eye(10), (FLAGS.batch_size, 1, 1)), (FLAGS.batch_size * 10, 10)), dtype=tf.float32), trainable=False, dtype=tf.float32) x_split = tf.tile( tf.reshape( X_SPLIT[j], (ind_batch_size, 1, 32, 32, 3)), (1, 10, 1, 1, 1)) x_split = tf.reshape(x_split, (ind_batch_size * 10, 32, 32, 3)) energy_pos = model.forward( x_split, weights[0], label=label_tensor, stop_at_grad=False) energy_pos_full = tf.reshape(energy_pos, (ind_batch_size, 10)) energy_partition_est = tf.reduce_logsumexp( energy_pos_full, axis=1, keepdims=True) uniform = tf.random_uniform(tf.shape(energy_pos_full)) label_tensor = tf.argmax(-energy_pos_full - tf.log(-tf.log(uniform)) - energy_partition_est, axis=1) label = tf.one_hot(label_tensor, 10, dtype=tf.float32) label = tf.Print(label, [label_tensor, energy_pos_full]) LABEL_SPLIT[j] = label energy_pos = tf.concat(energy_pos, axis=0) else: energy_pos = [ model.forward( X_SPLIT[j], weights[0], label=LABEL_POS_SPLIT[j], stop_at_grad=False)] energy_pos = tf.concat(energy_pos, axis=0) print("Building graph...") x_mod = x_orig = X_NOISE_SPLIT[j] x_grads = [] energy_negs = [] loss_energys = [] energy_negs.extend([model.forward(tf.stop_gradient( x_mod), weights[0], label=LABEL_SPLIT[j], stop_at_grad=False, reuse=True)]) eps_begin = tf.zeros(1) steps = tf.constant(0) c = lambda i, x: tf.less(i, FLAGS.num_steps) def langevin_step(counter, x_mod): x_mod = x_mod + tf.random_normal(tf.shape(x_mod), mean=0.0, stddev=0.005 * FLAGS.rescale * FLAGS.noise_scale) energy_noise = energy_start = tf.concat( [model.forward( x_mod, weights[0], label=LABEL_SPLIT[j], reuse=True, stop_at_grad=False, stop_batch=True)], axis=0) x_grad, label_grad = tf.gradients( FLAGS.temperature * energy_noise, [x_mod, LABEL_SPLIT[j]]) energy_noise_old = energy_noise lr = FLAGS.step_lr if FLAGS.proj_norm != 0.0: if FLAGS.proj_norm_type == 'l2': x_grad = tf.clip_by_norm(x_grad, FLAGS.proj_norm) elif FLAGS.proj_norm_type == 'li': x_grad = tf.clip_by_value( x_grad, -FLAGS.proj_norm, FLAGS.proj_norm) else: print("Other types of projection are not supported!!!") assert False # Clip gradient norm for now if FLAGS.hmc: # Step size should be tuned to get around 65% acceptance def energy(x): return FLAGS.temperature * \ model.forward(x, weights[0], label=LABEL_SPLIT[j], reuse=True) x_last = hmc(x_mod, 15., 10, energy) else: x_last = x_mod - (lr) * x_grad x_mod = x_last x_mod = tf.clip_by_value(x_mod, 0, FLAGS.rescale) counter = counter + 1 return counter, x_mod steps, x_mod = tf.while_loop(c, langevin_step, (steps, x_mod)) energy_eval = model.forward(x_mod, weights[0], label=LABEL_SPLIT[j], stop_at_grad=False, reuse=True) x_grad = tf.gradients(FLAGS.temperature * energy_eval, [x_mod])[0] x_grads.append(x_grad) energy_negs.append( model.forward( tf.stop_gradient(x_mod), weights[0], label=LABEL_SPLIT[j], stop_at_grad=False, reuse=True)) test_x_mod = x_mod temp = FLAGS.temperature energy_neg = energy_negs[-1] x_off = tf.reduce_mean( tf.abs(x_mod[:tf.shape(X_SPLIT[j])[0]] - X_SPLIT[j])) loss_energy = model.forward( x_mod, weights[0], reuse=True, label=LABEL, stop_grad=True) print("Finished processing loop construction ...") target_vars = {} if FLAGS.cclass or FLAGS.model_cclass: label_sum = tf.reduce_sum(LABEL_SPLIT[0], axis=0) label_prob = label_sum / tf.reduce_sum(label_sum) label_ent = -tf.reduce_sum(label_prob * tf.math.log(label_prob + 1e-7)) else: label_ent = tf.zeros(1) target_vars['label_ent'] = label_ent if FLAGS.train: if FLAGS.objective == 'logsumexp': pos_term = temp * energy_pos energy_neg_reduced = (energy_neg - tf.reduce_min(energy_neg)) coeff = tf.stop_gradient(tf.exp(-temp * energy_neg_reduced)) norm_constant = tf.stop_gradient(tf.reduce_sum(coeff)) + 1e-4 pos_loss = tf.reduce_mean(temp * energy_pos) neg_loss = coeff * (-1 * temp * energy_neg) / norm_constant loss_ml = FLAGS.ml_coeff * (pos_loss + tf.reduce_sum(neg_loss)) elif FLAGS.objective == 'cd': pos_loss = tf.reduce_mean(temp * energy_pos) neg_loss = -tf.reduce_mean(temp * energy_neg) loss_ml = FLAGS.ml_coeff * (pos_loss + tf.reduce_sum(neg_loss)) elif FLAGS.objective == 'softplus': loss_ml = FLAGS.ml_coeff * \ tf.nn.softplus(temp * (energy_pos - energy_neg)) loss_total = tf.reduce_mean(loss_ml) if not FLAGS.zero_kl: loss_total = loss_total + tf.reduce_mean(loss_energy) loss_total = loss_total + \ FLAGS.l2_coeff * (tf.reduce_mean(tf.square(energy_pos)) + tf.reduce_mean(tf.square((energy_neg)))) print("Started gradient computation...") gvs = optimizer.compute_gradients(loss_total) gvs = [(k, v) for (k, v) in gvs if k is not None] print("Applying gradients...") tower_grads.append(gvs) print("Finished applying gradients.") target_vars['loss_ml'] = loss_ml target_vars['total_loss'] = loss_total target_vars['loss_energy'] = loss_energy target_vars['weights'] = weights target_vars['gvs'] = gvs target_vars['X'] = X target_vars['Y'] = Y target_vars['LABEL'] = LABEL target_vars['LABEL_POS'] = LABEL_POS target_vars['X_NOISE'] = X_NOISE target_vars['energy_pos'] = energy_pos target_vars['energy_start'] = energy_negs[0] if len(x_grads) >= 1: target_vars['x_grad'] = x_grads[-1] target_vars['x_grad_first'] = x_grads[0] else: target_vars['x_grad'] = tf.zeros(1) target_vars['x_grad_first'] = tf.zeros(1) target_vars['x_mod'] = x_mod target_vars['x_off'] = x_off target_vars['temp'] = temp target_vars['energy_neg'] = energy_neg target_vars['test_x_mod'] = test_x_mod target_vars['eps_begin'] = eps_begin if FLAGS.train: grads = average_gradients(tower_grads) train_op = optimizer.apply_gradients(grads) target_vars['train_op'] = train_op config = tf.ConfigProto() if hvd.size() > 1: config.gpu_options.visible_device_list = str(hvd.local_rank()) sess = tf.Session(config=config) saver = loader = tf.train.Saver( max_to_keep=30, keep_checkpoint_every_n_hours=6) total_parameters = 0 for variable in tf.trainable_variables(): # shape is an array of tf.Dimension shape = variable.get_shape() variable_parameters = 1 for dim in shape: variable_parameters *= dim.value total_parameters += variable_parameters print("Model has a total of {} parameters".format(total_parameters)) sess.run(tf.global_variables_initializer()) resume_itr = 0 if (FLAGS.resume_iter != -1 or not FLAGS.train) and hvd.rank() == 0: model_file = osp.join(logdir, 'model_{}'.format(FLAGS.resume_iter)) resume_itr = FLAGS.resume_iter # saver.restore(sess, model_file) optimistic_restore(sess, model_file) sess.run(hvd.broadcast_global_variables(0)) print("Initializing variables...") print("Start broadcast") print("End broadcast") if FLAGS.train: train(target_vars, saver, sess, logger, data_loader, resume_itr, logdir) test(target_vars, saver, sess, logger, data_loader)
def build_graph(self): """Build the model graph.""" opts = self._options # The training data. A text file. (words, counts, words_per_epoch, current_epoch, total_words_processed, examples, labels) = word2vec.skipgram(filename=opts.train_data, batch_size=opts.batch_size, window_size=opts.window_size, min_count=opts.min_count, subsample=opts.subsample) (opts.vocab_words, opts.vocab_counts, opts.words_per_epoch) = self._session.run( [words, counts, words_per_epoch]) opts.vocab_size = len(opts.vocab_words) print("Data file: ", opts.train_data) print("Vocab size: ", opts.vocab_size - 1, " + UNK") print("Words per epoch: ", opts.words_per_epoch) self._id2word = opts.vocab_words for i, w in enumerate(self._id2word): self._word2id[w] = i # Declare all variables we need. # Input words embedding: [vocab_size, emb_dim] w_in = tf.Variable(tf.random_uniform([opts.vocab_size, opts.emb_dim], -0.5 / opts.emb_dim, 0.5 / opts.emb_dim), name="w_in") # Global step: scalar, i.e., shape []. w_out = tf.Variable(tf.zeros([opts.vocab_size, opts.emb_dim]), name="w_out") # Global step: [] global_step = tf.Variable(0, name="global_step") # Linear learning rate decay. words_to_train = float(opts.words_per_epoch * opts.epochs_to_train) lr = opts.learning_rate * tf.maximum( 0.0001, 1.0 - tf.cast(total_words_processed, tf.float32) / words_to_train) # Training nodes. inc = global_step.assign_add(1) with tf.control_dependencies([inc]): train = word2vec.neg_train(w_in, w_out, examples, labels, lr, vocab_count=opts.vocab_counts.tolist(), num_negative_samples=opts.num_samples) self._w_in = w_in self._examples = examples self._labels = labels self._lr = lr self._train = train self.step = global_step self._epoch = current_epoch self._words = total_words_processed
def mul_temperature(logits_BxN, temperature): logits_shape = tf.shape(logits_BxN) uniform_noise_BxN = tf.random_uniform(logits_shape) logits_BxN += -tf.log(-tf.log(uniform_noise_BxN)) * temperature return logits_BxN
def inference(batch_placeholders, similarity_placeholder, init_word_embeds, word_to_num, num_to_word): print("Begin inference:") print("Creating variables") E = tf.Variable(init_word_embeds, dtype=tf.float32) W = tf.Variable( tf.random_uniform( [params.lstm_size, params.lstm_size, params.slice_size], minval=-1.0 / params.lstm_size, maxval=1.0 / params.lstm_size, name='W')) V = tf.Variable( tf.random_uniform([params.slice_size, 2 * params.lstm_size], minval=-1.0 / (2 * params.lstm_size), maxval=1.0 / (2 * params.lstm_size))) b = tf.Variable(tf.zeros([1, params.slice_size]), name='b') U = tf.Variable( tf.random_uniform([1, params.slice_size], minval=-1.0 / params.slice_size, maxval=1.0 / params.slice_size)) lstm = createLSTM(params.lstm_size) print("Calcing sentences2vec") question_vec, pos_answer_vec, neg1, neg2, neg3 = tf.split( 1, params.corrupt_size + 2, batch_placeholders) #scr_pos_answer, scr_neg1 , scr_neg2 , scr_neg3 = tf.split(1, params.corrupt_size+1,similarity_placeholder) #similarity_scores = tf.cast(similarity_placeholder, tf.float32) question_vec = tf.squeeze(question_vec) pos_answer_vec = tf.squeeze(pos_answer_vec) neg1 = tf.squeeze(neg1) neg2 = tf.squeeze(neg2) neg3 = tf.squeeze(neg3) #scr_pos_answer = tf.squeeze(scr_pos_answer) #scr_neg1 = tf.squeeze(scr_neg1) #scr_neg2 = tf.squeeze(scr_neg2) #scr_neg3 = tf.squeeze(scr_neg3) #question_vec = tf.reduce_mean(tf.gather(E,question_vec),1) question_vec = train_sentence2vectorLSTM(lstm, E, question_vec, False) pos_answer_vec = train_sentence2vectorLSTM(lstm, E, pos_answer_vec, True) neg1 = train_sentence2vectorLSTM(lstm, E, neg1, True) neg2 = train_sentence2vectorLSTM(lstm, E, neg2, True) neg3 = train_sentence2vectorLSTM(lstm, E, neg3, True) #new_p = tf.zeros([pparams.lstm_size+1]) #pos_answer_vec = tf.reshape(pos_answer_vec, [-1]) #print scr_pos_answer.get_shape #pos_answer_vec = tf.concat(1,[pos_answer_vec,scr_pos_answer]) #neg1 = tf.concat(1,[neg1,scr_neg1]) #neg2 = tf.concat(1,[neg2,scr_neg2]) #neg3 = tf.concat(1,[neg3,scr_neg3]) #pos_answer_vec = tf.reduce_mean(tf.gather(E, pos_answer_vec), 1) #neg1 = tf.reduce_mean(tf.gather(E, neg1), 1) #neg2 = tf.reduce_mean(tf.gather(E, neg2), 1) #neg3 = tf.reduce_mean(tf.gather(E, neg3), 1) tensors = [] for i in range(params.slice_size): tensor = tf.reduce_sum( pos_answer_vec * tf.matmul(question_vec, W[:, :, i]), 1) tensors.append(tensor) score_pos = tf.pack(tensors) vec_concat = tf.transpose( tf.matmul(V, tf.transpose(tf.concat(1, [question_vec, pos_answer_vec])))) score_pos = tf.matmul(tf.nn.relu(tf.transpose(score_pos) + vec_concat + b), tf.transpose(U)) negative = [] for i in [neg1, neg2, neg3]: tensors = [] for j in range(params.slice_size): tensor = tf.reduce_sum(i * tf.matmul(question_vec, W[:, :, j]), 1) tensors.append(tensor) score_neg = tf.pack(tensors) vec_concat = tf.transpose( tf.matmul(V, tf.transpose(tf.concat(1, [question_vec, i])))) score_neg = tf.matmul( tf.nn.relu(tf.transpose(score_neg) + vec_concat + b), tf.transpose(U)) negative.append(score_neg) return [score_pos, negative[0], negative[1], negative[2]]
def adjective_embeddings(data_file, embeddings_file_name, num_steps, embedding_dim): # Specification of Training data: batch_size = 64 # Size of mini-batch for skip-gram model. embedding_size = embedding_dim # Dimension of the embedding vector. # How many times to reuse an input to generate a label. num_sampled = 200 # Sample size for negative examples. logs_path = './log/' learning_rate_ = 0.01 # Specification of test Sample: sample_size = 20 # Random sample of words to evaluate similarity. sample_window = 20 # Only pick samples in the head of the distribution. sample_examples = np.random.choice(sample_window, sample_size, replace=False) # Randomly pick a sample of size 16 f = open(data_file, 'rb') dictionary , reverse_dictionary,read_data,read_label = pickle.load(f) print("ddddddd", reverse_dictionary) print() print("rrrrrrrr", read_label) print("wwwwwwww", read_data) batch = np.ndarray(shape=(batch_size), dtype=np.int32) labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) ## Constructing the graph... graph = tf.Graph() with graph.as_default(): with tf.device('/cpu:0'): # Placeholders to read input data. with tf.name_scope('Inputs'): train_inputs = tf.placeholder(tf.int32, shape=[batch_size]) train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) # Look up embeddings for inputs. with tf.name_scope('Embeddings'): sample_dataset = tf.constant(sample_examples, dtype=tf.int32) embeddings = tf.Variable(tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) embed = tf.nn.embedding_lookup(embeddings, train_inputs) # Construct the variables for the NCE loss nce_weights = tf.Variable(tf.truncated_normal([vocabulary_size, embedding_size], stddev=1.0 / math.sqrt(embedding_size))) nce_biases = tf.Variable(tf.zeros([vocabulary_size])) # Compute the average NCE loss for the batch. # tf.nce_loss automatically draws a new sample of the negative labels each # time we evaluate the loss. with tf.name_scope('Loss'): loss = tf.reduce_mean(tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, labels=train_labels, inputs=embed, num_sampled=num_sampled, num_classes=vocabulary_size)) # Construct the Gradient Descent optimizer using a learning rate of 0.01. with tf.name_scope('Adam_Optimizer'): optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate_).minimize(loss) # Normalize the embeddings to avoid overfitting. with tf.name_scope('Normalization'): norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) normalized_embeddings = embeddings / norm sample_embeddings = tf.nn.embedding_lookup(normalized_embeddings, sample_dataset) similarity = tf.matmul(sample_embeddings, normalized_embeddings, transpose_b=True) # Add variable initializer. init = tf.global_variables_initializer() # Create a summary to monitor cost tensor tf.summary.scalar("cost", loss) # Merge all summary variables. merged_summary_op = tf.summary.merge_all() with tf.Session(graph=graph) as session: # We must initialize all variables before we use them. session.run(init) summary_writer = tf.summary.FileWriter(logs_path, graph=tf.get_default_graph()) print('Initializing the model') length = len(read_data) average_loss = 0 for step in range(num_steps): print(step) batch_inputs = np.ndarray(shape=(batch_size), dtype=np.int32) batch_labels = np.ndarray(shape=(batch_size, 1), dtype=np.int32) aa = step * batch_size % length bb = 0 for bb in range(batch_size): batch_inputs[bb] = read_data[aa] batch_labels[bb,0] = read_label[aa] aa = aa +1 if aa == length: aa =0 # batch_inputs, batch_labels = train_inputs, train_labels feed_dict = {train_inputs: batch_inputs, train_labels: batch_labels} # We perform one update step by evaluating the optimizer op using session.run() _, loss_val, summary = session.run([optimizer, loss, merged_summary_op], feed_dict=feed_dict) summary_writer.add_summary(summary, step) average_loss += loss_val if step % 5000 == 0: if step > 0: average_loss /= 5000 # The average loss is an estimate of the loss over the last 5000 batches. print('Average loss at step ', step, ': ', average_loss) average_loss = 0 # Evaluate similarity after every 10000 iterations. if step % 10000 == 0: sim = similarity.eval() # for i in range(sample_size): sample_word = reverse_dictionary[sample_examples[i]] top_k = 10 # Look for top-10 neighbours for words in sample set. nearest = (-sim[i, :]).argsort()[1:top_k + 1] print(top_k) log_str = 'Nearest to %s:' % sample_word for k in range(top_k): print("22222222", nearest[k]) close_word = reverse_dictionary[nearest[k]] # print("22222222", nearest[k]) log_str = '%s %s,' % (log_str, close_word) print(log_str) print() final_embeddings = normalized_embeddings.eval() embedding_number = 0 embedding_index_number_list = list() with open(embeddings_file_name, 'w') as outputfile: outputfile.write(str(len(final_embeddings))) outputfile.write(' ') outputfile.write(str(embedding_size)) for i in range(len(final_embeddings)): outputfile.write('\n') outputfile.write(reverse_dictionary[i]) for j in range(embedding_size): outputfile.write(' ') outputfile.write(str(round(final_embeddings[i][j],6)))
def train(self): loss_dis = -tf.reduce_mean(self.D_real) + tf.reduce_mean(self.D_fake) loss_gen = -tf.reduce_mean(self.D_fake) alpha = tf.random_uniform(shape=[self.batch_size, 1], minval=0., maxval=1.) differences = self.g - self.x interpolates = self.x + alpha * differences gradients = tf.gradients(self._discriminator(interpolates), [interpolates])[0] slopes = tf.sqrt( tf.reduce_sum(tf.square(gradients), reduction_indices=[1])) gradient_penalty = tf.reduce_mean((slopes - 1.)**2) loss_dis += self.LAMBDA * gradient_penalty opt_dis = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.5, beta2=0.9).minimize( loss_dis, var_list=self.params_dis) opt_gen = tf.train.AdamOptimizer(learning_rate=self.learning_rate, beta1=0.5, beta2=0.9).minimize( loss_gen, var_list=self.params_gen) init = tf.global_variables_initializer() self.sess = tf.Session() self.sess.run(init) disp_step_num = 1000 display_num = 10 if not os.path.exists('out/'): os.makedirs('out/') fig_i = 0 for step in range(self.step_num): for _ in range(5): xs, ys = self.data.train.next_batch(batch_size) zs = sample_z(self.batch_size, self.z_shape) _, l_dis = self.sess.run([opt_dis, loss_dis], feed_dict={ self.z: zs, self.x: xs }) zs = sample_z(self.batch_size, self.z_shape) _, l_gen = self.sess.run([opt_gen, loss_gen], feed_dict={self.z: zs}) if step % 100 == 0: print('Step: {}, loss_dis = {:.5}, loss_gen = {:.5}'.format( step, l_dis, l_gen)) if step % disp_step_num == 0: fig = self._display() plt.savefig('out/{}.png'.format(str(fig_i).zfill(3)), bbox_inches='tight') fig_i += 1 plt.close(fig) self.sess.close()
def word2vec(batch_gen): """ Build the graph for word2vec model and train it """ # Step 1: define the placeholders for input and output # center_words have to be int to work on embedding lookup X = tf.placeholder(tf.int32, shape=[BATCH_SIZE], name="x-placeholder") Y = tf.placeholder(tf.int32, shape=[BATCH_SIZE, 1], name="y-placeholder") # Step 2: define weights. In word2vec, it's actually the weights that we care about # vocab size x embed size # initialized to random uniform -1 to 1 matrix = tf.Variable(tf.random_uniform([VOCAB_SIZE, EMBED_SIZE], -1.0, 1.0), name="matrix") # TOO DO # Step 3: define the inference # get the embed of input words using tf.nn.embedding_lookup embed = tf.nn.embedding_lookup(matrix, X, name='embed') # Step 4: construct variables for NCE loss # tf.nn.nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, ...) # nce_weight (vocab size x embed size), intialized to truncated_normal stddev=1.0 / (EMBED_SIZE ** 0.5) # bias: vocab size, initialized to 0 weights = tf.Variable(tf.truncated_normal([VOCAB_SIZE, EMBED_SIZE], stddev=1.0 / (EMBED_SIZE**0.5)), name="weight") bias = tf.Variable(tf.zeros([VOCAB_SIZE]), name="bias") # define loss function to be NCE loss function # tf.nn.nce_loss(weights, biases, labels, inputs, num_sampled, num_classes, ...) # need to get the mean accross the batch nce_loss = tf.nn.nce_loss(weights=weights, biases=bias, labels=Y, inputs=embed, num_sampled=NUM_SAMPLED, num_classes=VOCAB_SIZE) loss = tf.reduce_mean(nce_loss) # Step 5: define optimizer optimizer = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize(loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) total_loss = 0.0 # we use this to calculate the average loss in the last SKIP_STEP steps writer = tf.summary.FileWriter('./my_graph/no_frills/', sess.graph) for index in xrange(NUM_TRAIN_STEPS): centers, targets = batch_gen.next() op, loss_batch = sess.run([optimizer, loss], feed_dict={ X: centers, Y: targets }) total_loss += loss_batch if (index + 1) % SKIP_STEP == 0: print('Average loss at step {}: {:5.1f}'.format( index, total_loss / SKIP_STEP)) total_loss = 0.0 writer.close()
import gym import numpy as np import random import tensorflow as tf import matplotlib.pyplot as plt env = gym.make('FrozenLake-v0') tf.reset_default_graph() #These lines establish the feed-forward part of the network used to choose actions inputs1 = tf.placeholder(shape=[1, 16], dtype=tf.float32) W = tf.Variable(tf.random_uniform([16, 4], 0, 0.01)) Qout = tf.matmul(inputs1, W) predict = tf.argmax(Qout, 1) #Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values. nextQ = tf.placeholder(shape=[1, 4], dtype=tf.float32) loss = tf.reduce_sum(tf.square(nextQ - Qout)) trainer = tf.train.GradientDescentOptimizer(learning_rate=0.1) updateModel = trainer.minimize(loss) init = tf.global_variables_initializer() # Set learning parameters y = .99 e = 0.1 num_episodes = 2000 #create lists to contain total rewards and steps per episode jList = [] rList = [] with tf.Session() as sess: sess.run(init) for i in range(num_episodes):
def __init__( self, sequence_length, num_classes, embedding_model: word2vec.WordVectors, filter_sizes, num_filters, l2_reg_lambda=0.0): vocab_size, embedding_size = embedding_model.vectors.shape[1] # Placeholders for input, output and dropout self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") self.input_y = tf.placeholder(tf.float32, [None, num_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # Keeping track of l2 regularization loss (optional) l2_loss = tf.constant(0.0) # Embedding layer with tf.device('/cpu:0'), tf.name_scope("embedding"): self.W = tf.Variable( tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name="W") self.embedded_chars = tf.nn.embedding_lookup(self.W, self.input_x) self.embedded_chars_expanded = tf.expand_dims(self.embedded_chars, -1) # Create a convolution + maxpool layer for each filter size pooled_outputs = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, embedding_size, 1, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") conv = tf.nn.conv2d( self.embedded_chars_expanded, W, strides=[1, 1, 1, 1], padding="VALID", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs pooled = tf.nn.max_pool( h, ksize=[1, sequence_length - filter_size + 1, 1, 1], strides=[1, 1, 1, 1], padding='VALID', name="pool") pooled_outputs.append(pooled) # Combine all the pooled features num_filters_total = num_filters * len(filter_sizes) self.h_pool = tf.concat(pooled_outputs, 3) self.h_pool_flat = tf.reshape(self.h_pool, [-1, num_filters_total]) # Add dropout with tf.name_scope("dropout"): self.h_drop = tf.nn.dropout(self.h_pool_flat, self.dropout_keep_prob) # Final (unnormalized) scores and predictions with tf.name_scope("output"): W = tf.get_variable( "W", shape=[num_filters_total, num_classes], initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") l2_loss += tf.nn.l2_loss(W) l2_loss += tf.nn.l2_loss(b) self.scores = tf.nn.xw_plus_b(self.h_drop, W, b, name="scores") self.predictions = tf.argmax(self.scores, 1, name="predictions") # Calculate mean cross-entropy loss with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y) self.loss = tf.reduce_mean(losses) + l2_reg_lambda * l2_loss # Accuracy with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def random_phase_in_radians(shape, dtype): return np.pi * (2 * tf.random_uniform(shape, dtype=dtype) - 1.0)
def tf_augment_input_bbox(self, stacked_points, bboxes, batch_inds, config): # Parameter num_batches = batch_inds[-1] + 1 ########## # Rotation ########## if config.augment_rotation == 'vertical': # Choose a random angle for each element theta = tf.random_uniform((num_batches,), minval=0, maxval=2*np.pi) # Rotation matrices c, s = tf.cos(theta), tf.sin(theta) cs0 = tf.zeros_like(c) cs1 = tf.ones_like(c) R = tf.stack([c, -s, cs0, s, c, cs0, cs0, cs0, cs1], axis=1) R = tf.reshape(R, (-1, 3, 3)) # Create N x 3 x 3 rotation matrices to multiply with stacked_points stacked_rots = tf.gather(R, batch_inds) # Apply rotations stacked_points = tf.reshape(tf.matmul(tf.expand_dims(stacked_points, axis=1), stacked_rots), [-1, 3]) # Apply rotations to bboxes new_centers = tf.expand_dims(bboxes[:, :, :3], axis=2) tmp_R = tf.tile(tf.expand_dims(R, axis=1), tf.shape(new_centers[:1, :, :1, :1])) new_centers = tf.matmul(new_centers, tmp_R) bboxes = tf.concat((tf.squeeze(new_centers), bboxes[:, :, :3]), axis=2) elif config.augment_rotation == 'none': R = tf.eye(3, batch_shape=(num_batches,)) else: raise ValueError('Unknown rotation augmentation : ' + config.augment_rotation) ####### # Scale ####### # Choose random scales for each example min_s = config.augment_scale_min max_s = config.augment_scale_max if config.augment_scale_anisotropic: s = tf.random_uniform((num_batches, 3), minval=min_s, maxval=max_s) raise ValueError("Applying anisotropic scale augmentation to cylinders is not advised.") else: s = tf.random_uniform((num_batches, 1), minval=min_s, maxval=max_s) # Apply scale to height and radius before symmetries new_hr = bboxes[:, :, 3:] * tf.expand_dims(s, axis=2) if config.augment_symmetries: symetries = tf.round(tf.random_uniform((num_batches, 3))) * 2 - 1 s = s * symetries # Create N x 3 vector of scales to multiply with stacked_points stacked_scales = tf.gather(s, batch_inds) # Apply scales stacked_points = stacked_points * stacked_scales # Apply scale to bboxes new_centers = bboxes[:, :, :3] * tf.expand_dims(s, axis=1) bboxes = tf.concat((new_centers, new_hr), axis=2) ####### # Noise ####### noise = tf.random_normal(tf.shape(stacked_points), stddev=config.augment_noise) stacked_points = stacked_points + noise return stacked_points, bboxes, s, R
import tensorflow as tf import numpy as np # 使用 NumPy 生成假数据(phony data), 总共 100 个点. x_data = np.float32(np.random.rand(2, 100)) # 随机输入 y_data = np.dot([0.100, 0.200], x_data) + 0.300 # 构造一个线性模型 # b = tf.Variable(tf.zeros([1])) W = tf.Variable(tf.random_uniform([1, 2], -1.0, 1.0)) y = tf.matmul(W, x_data) + b # 最小化方差 loss = tf.reduce_mean(tf.square(y - y_data)) optimizer = tf.train.GradientDescentOptimizer(0.5) train = optimizer.minimize(loss) # 初始化变量 init = tf.initialize_all_variables() # 启动图 (graph) sess = tf.Session() sess.run(init) # 拟合平面 for step in range(0, 201): sess.run(train) if step % 20 == 0: print(step, sess.run(W), sess.run(b))
def sample_gumbel(shape, eps=1e-20): """Sample from Gumbel(0, 1)""" U = tf.random_uniform(shape,minval=0,maxval=1) return -tf.log(-tf.log(U + eps) + eps)
import tensorflow as tf input1 = tf.constant([1.0, 2.0, 3.0], name='input1') input2 = tf.Variable(tf.random_uniform([3]), name='input2') output = tf.add_n([input1, input2], name='add') # 生成一个写文件的writer,并将当前的TensorFlow计算图写入日志 writer = tf.summary.FileWriter('/home/dengkaiting/pycharm_project/DeepLearning/tensorflow_book/logs', tf.get_default_graph()) writer.close()
def sample(logits): noise = tf.random_uniform(tf.shape(logits)) return tf.argmax(logits - tf.log(-tf.log(noise)), 1)
def tf_augment_input(self, stacked_points, batch_inds, config): # Parameter num_batches = batch_inds[-1] + 1 ########## # Rotation ########## if config.augment_rotation == 'vertical': # Choose a random angle for each element theta = tf.random_uniform((num_batches,), minval=0, maxval=2*np.pi) # Rotation matrices c, s = tf.cos(theta), tf.sin(theta) cs0 = tf.zeros_like(c) cs1 = tf.ones_like(c) R = tf.stack([c, -s, cs0, s, c, cs0, cs0, cs0, cs1], axis=1) R = tf.reshape(R, (-1, 3, 3)) # Create N x 3 x 3 rotation matrices to multiply with stacked_points stacked_rots = tf.gather(R, batch_inds) # Apply rotations stacked_points = tf.reshape(tf.matmul(tf.expand_dims(stacked_points, axis=1), stacked_rots), [-1, 3]) elif config.augment_rotation == 'none': R = tf.eye(3, batch_shape=(num_batches,)) else: raise ValueError('Unknown rotation augmentation : ' + config.augment_rotation) ####### # Scale ####### # Choose random scales for each example min_s = config.augment_scale_min max_s = config.augment_scale_max if config.augment_scale_anisotropic: s = tf.random_uniform((num_batches, 3), minval=min_s, maxval=max_s) else: s = tf.random_uniform((num_batches, 1), minval=min_s, maxval=max_s) symmetries = [] for i in range(3): if config.augment_symmetries[i]: symmetries.append(tf.round(tf.random_uniform((num_batches, 1))) * 2 - 1) else: symmetries.append(tf.ones([num_batches, 1], dtype=tf.float32)) s *= tf.concat(symmetries, 1) # Create N x 3 vector of scales to multiply with stacked_points stacked_scales = tf.gather(s, batch_inds) # Apply scales stacked_points = stacked_points * stacked_scales ####### # Noise ####### noise = tf.random_normal(tf.shape(stacked_points), stddev=config.augment_noise) stacked_points = stacked_points + noise return stacked_points, s, R
def train_crbm2crbm(log_name, conv_size, input_size, chanl_input, chanl_output, parameters): images_input, labels_input = inputs(train='train', batch_size=FLAGS.batch_size, num_epochs=FLAGS.num_epochs) #images=tf.reshape(images,[-1,input_size,input_size,chanl_input]) W_conv1 = tf.placeholder("float", [conv_size, conv_size, chanl_input, chanl_output]) a_conv1 = tf.placeholder("float", [chanl_input]) b_conv1 = tf.placeholder("float", [chanl_output]) W_inc1 = tf.placeholder("float", [conv_size, conv_size, chanl_input, chanl_output]) a_inc1 = tf.placeholder("float", [chanl_input]) b_inc1 = tf.placeholder("float", [chanl_output]) W_extra1 = tf.placeholder("float", [11, 11, 1, 96]) #a_extra1=tf.placeholder("float",[1]) b_extra1 = tf.placeholder("float", [96]) images_placeholder = tf.placeholder(tf.float32, shape=(gd.BATCH_SIZE, 227 * 227)) images_extra = tf.reshape(images_placeholder, [-1, 227, 227, 1]) h_conv1 = 1. / ( 1 + tf.exp(-conv2d(images_extra, W_extra1, 4, 'VALID') - b_extra1)) norm1 = tf.nn.lrn(h_conv1, 5, bias=1.0, alpha=0.0001, beta=0.75, name='norm1') h_pool1 = max_pool(norm1, 3, 2, 'VALID') images = h_pool1 print(images) pos_conv1_prob = 1. / ( 1 + tf.exp(-conv2d(h_pool1, W_conv1, 1, 'VALID') - b_conv1)) pos_conv1_trans = tf.expand_dims(tf.reduce_mean(pos_conv1_prob, 0), 2) images_mean = tf.reduce_mean(images, 0) images_trans = tf.expand_dims( tf.reshape( tf.transpose( tf.reshape(tf.reduce_mean(images, 0), [-1, chanl_input])), [chanl_input, input_size, input_size]), 3) pos_prods_origin = conv2d(images_trans, pos_conv1_trans, 1, 'VALID') pos_prods_trans = tf.transpose(pos_prods_origin, [1, 2, 0, 3]) print('pos_prods_trans:' + str(pos_prods_trans)) pos_hid_act = tf.reduce_mean(pos_conv1_prob, 0) pos_vis_act = tf.reduce_mean(images, 0) #########################################################################3 pos_hid_states = tf.to_float( tf.less_equal(tf.random_uniform(shape=tf.shape(pos_conv1_prob)), pos_conv1_prob)) #if pad_choose=="VALID": pos_hid_states_addpad = tf.pad(pos_hid_states, [[0, 0], [conv_size - 1, conv_size - 1], [conv_size - 1, conv_size - 1], [0, 0]], "CONSTANT") #else: W_transpose = tf.matrix_transpose( tf.reverse(W_conv1, [True, True, False, False])) print('pos_conv1_prob:' + str(pos_conv1_prob)) neg_data = 1. / (1 + tf.exp(-tf.nn.conv2d_transpose( pos_conv1_prob, W_conv1, [gd.BATCH_SIZE, input_size, input_size, chanl_input], strides=[1, 1, 1, 1], padding='VALID') - a_conv1)) #neg_data=1./(1+tf.exp(-conv2d_s1_valid(pos_hid_states_addpad,W_transpose)-a_conv1)) #neg_data= print('neg_data' + str(neg_data)) neg_hid_probs = 1. / ( 1 + tf.exp(-conv2d(neg_data, W_conv1, 1, 'VALID') - b_conv1)) neg_data_trans = tf.expand_dims( tf.reshape( tf.transpose( tf.reshape(tf.reduce_mean(neg_data, 0), [-1, chanl_input])), [chanl_input, input_size, input_size]), 3) neg_hid_probs_trans = tf.expand_dims(tf.reduce_mean(neg_hid_probs, 0), 2) neg_prods_origin = conv2d(neg_data_trans, neg_hid_probs_trans, 1, 'VALID') neg_prods_trans = tf.transpose(neg_prods_origin, [1, 2, 0, 3]) print('neg_prods_trans' + str(neg_prods_trans)) neg_hid_act = tf.reduce_mean(neg_hid_probs, 0) neg_vis_act = tf.reduce_mean(neg_data, 0) err_sum = tf.reduce_sum(tf.square(images - neg_data)) #reshaped_W=tf.transpose(tf.reshape(tf.transpose(tf.reshape(tf.squeeze(W_conv1),[-1,chanl_output])),[chanl_output*chanl_input,conv_size*conv_size])) W_inc_update = gd.momentum * W_inc1 + gd.epsilonw * ( (pos_prods_trans - neg_prods_trans) / gd.BATCH_SIZE - weightcost * W_conv1) a_inc_update = gd.momentum * a_inc1 + ( gd.epsilona / gd.BATCH_SIZE) * tf.reduce_mean(pos_vis_act - neg_vis_act) b_inc_update = gd.momentum * b_inc1 + ( gd.epsilonb / gd.BATCH_SIZE) * tf.reduce_mean( tf.reduce_mean((pos_hid_act - neg_hid_act), 0), 0) init_op = tf.initialize_all_variables() tf.scalar_summary('loss', err_sum) tf.scalar_summary('a', a_conv1[0]) tf.scalar_summary('b', b_conv1[0]) tf.scalar_summary('W', W_conv1[0][0][0][0]) summary_op = tf.merge_all_summaries() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: sess.run(init_op) # summary_writer=tf.train.SummaryWriter(FLAGS.train_dir,sess.graph) # coord=tf.train.Coordinator() # threads=tf.train.start_queue_runners(sess=sess,coord=coord) summary_writer = tf.train.SummaryWriter(FLAGS.tensorevents_dir, sess.graph) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) W_update_0 = np.random.normal( 0, 0.1, [conv_size, conv_size, chanl_input, chanl_output]) a_update_0 = np.zeros([chanl_input], np.float32) b_update_0 = np.zeros([chanl_output], np.float32) W_inc_update_0 = np.zeros( [conv_size, conv_size, chanl_input, chanl_output], np.float32) a_inc_update_0 = np.zeros([chanl_input], np.float32) b_inc_update_0 = np.zeros([chanl_output], np.float32) W_extra1_0 = parameters[0] a_extra1_0 = parameters[1] b_extra1_0 = parameters[2].reshape(chanl_input) try: step = 0 while step < 10000: start_time = time.time() #print(images_input.eval(session=sess).shape) #print(a_update_0) # logfile=open(log_name,'a') # logfile.write("epoch: "+str(step)+'\n') # logfile.write("W:\n"+str(W_update_0[0])+'\n') images_wtf = images_input.eval(session=sess) # concat_img=Image.fromarray( # tile_raster_images( # X=images_wtf, # img_shape=(32, 32), # tile_shape=(10, 10) # )) # concat_img.save(FLAGS.pic_dir+str(step)+'_train'+'.jpg') # logfile.close() W_inc_update_0, a_update_0, b_inc_update_0, loss, neg_data_out, images_out = sess.run( [ W_inc_update, a_inc_update, b_inc_update, err_sum, neg_data, images ], feed_dict={ images_placeholder: images_wtf, W_conv1: W_update_0, a_conv1: a_update_0, b_conv1: b_update_0, W_inc1: W_inc_update_0, a_inc1: a_inc_update_0, b_inc1: b_inc_update_0, W_extra1: W_extra1_0, b_extra1: b_extra1_0 }) W_update_0 = W_update_0 + W_inc_update_0 a_update_0 = a_update_0 + a_inc_update_0 b_update_0 = b_update_0 + b_inc_update_0 # logfile=open(log_name,'a') # logfile.write("epoch: "+str(step)+'\n') # logfile.write("W_inc:\n"+str(W_inc_update_0[0])+'\n') # logfile.close() #print('step '+str(step)+": loss="+str(loss)+'\n') print("step %d: loss = %d" % (step, loss)) if step % 10 == 0: logfile = open(log_name, 'a') logfile.write('step ' + str(step) + ": loss=" + str(loss) + '\n') logfile.write("W:\n" + str(W_update_0[0]) + '\n') #logfile.write("W_inc:\n"+str(W_inc_update_0[0])+'\n') logfile.close() # print(to_image(neg_data_out).shape) # print("images_out"+str(to_image(images_out).shape)) # weight_img=Image.fromarray( # tile_raster_images( # X=reshaped_W_out.T, # img_shape=(conv_size, conv_size), # tile_shape=(chanl_input, chanl_output), # )) # weight_img.save(FLAGS.Weight_dir+'weight_'+str(step)+'.jpg') summary_str = sess.run(summary_op, feed_dict={ images_placeholder: images_wtf, W_conv1: W_update_0, a_conv1: a_update_0, b_conv1: b_update_0, W_inc1: W_inc_update_0, a_inc1: a_inc_update_0, b_inc1: b_inc_update_0, W_extra1: W_extra1_0, b_extra1: b_extra1_0 }) summary_writer.add_summary(summary_str, step) if step % 50 == 0: save_fn = FLAGS.log_dir + '/parameters_layer2_epoch_' + str( step) + '.mat' sio.savemat( save_fn, { 'W1': W_extra1_0, 'b1': b_extra1_0, 'W2': W_update_0, 'b2': b_update_0 }) saveimg = Image.fromarray( 255 * to_image(images_out)[:, :, 0]) #print(to_image(images_input.eval(session=sess).reshape(gd.BATCH_SIZE,input_size,input_size,in)).shape) saveimg = saveimg.convert('RGB') saveimg.save(FLAGS.pic_dir + 'imag_layer1_epoch' + str(step) + '.jpg') saveimg_negv = Image.fromarray( 255 * to_image(neg_data_out)[:, :, 0]) #print(to_image(neg_data_out).shape) saveimg_negv = saveimg_negv.convert('RGB') saveimg_negv.save(FLAGS.pic_dir + 'negv_layer1_epoch' + str(step) + '.jpg') step += 1 # if step==100: # return W_update_0,a_update_0,b_update_0 except tf.errors.OutOfRangeError: print('Done training for %d epochs, %d steps.' % (1001, step)) finally: coord.request_stop() coord.join(threads) sess.close() return W_update_0, a_update_0, b_update_0
def _get_exchanged_states(self, old_states, exchange_proposed, exchange_proposed_n, sampled_replica_states, sampled_replica_results): """Get list of TensorArrays holding exchanged states, and zeros.""" with tf.name_scope('get_exchanged_states'): target_log_probs = [] for replica in range(self.num_replica): replica_log_prob = _get_field(sampled_replica_results[replica], 'target_log_prob') inverse_temp = self.inverse_temperatures[replica] target_log_probs.append(replica_log_prob / inverse_temp) target_log_probs = tf.stack(target_log_probs, axis=0) dtype = target_log_probs.dtype num_state_parts = len(sampled_replica_states[0]) # exchanged_states[k][i] is Tensor of (new) state part k, for replica i. # The `k` will be known statically, and `i` is a Tensor. # We will insert values into indices `i` for every replica with a proposed # exchange. exchanged_states = [ tf.TensorArray( dtype, size=self.num_replica, dynamic_size=False, tensor_array_name='exchanged_states', # State part k has same shape, regardless of replica. So use 0. element_shape=sampled_replica_states[0][k].shape) for k in range(num_state_parts) ] # Draw random variables here, to avoid sampling in the loop (and losing # reproducibility). This may mean we sample too many, but we will always # have enough. sample_shape = tf.concat( ([self.num_replica // 2], tf.shape(target_log_probs)[1:]), axis=0) log_uniforms = tf.log( tf.random_uniform( shape=sample_shape, dtype=dtype, seed=self._seed_stream())) def _swap(is_exchange_accepted, x, y): """Swap batches of x, y where accepted.""" with tf.name_scope('swap_where_exchange_accepted'): new_x = mcmc_util.choose(is_exchange_accepted, y, x) new_y = mcmc_util.choose(is_exchange_accepted, x, y) return new_x, new_y def cond(i, unused_exchanged_states): return i < exchange_proposed_n def body(i, exchanged_states): """Body of while loop for exchanging states.""" # Propose exchange between replicas indexed by m and n. m, n = tf.unstack(exchange_proposed[i]) # Construct log_accept_ratio: -temp_diff * target_log_prob_diff. # Note target_log_prob_diff = -EnergyDiff (common definition is in terms # of energy). temp_diff = self.inverse_temperatures[m] - self.inverse_temperatures[n] # Difference of target log probs may be +- Inf or NaN. We want the # product of this with the temperature difference to have "alt value" of # -Inf. log_accept_ratio = mcmc_util.safe_sum( [-temp_diff * target_log_probs[m], temp_diff * target_log_probs[n]]) is_exchange_accepted = log_uniforms[i] < log_accept_ratio is_exchange_accepted = tf.Print( is_exchange_accepted, [ 'is_exchange_accepted: ', is_exchange_accepted, 'temp_diff: ', temp_diff, 'log_accept_ratio: ', log_accept_ratio, ], summarize=2, first_n=0) for k in range(num_state_parts): new_m, new_n = _swap(is_exchange_accepted, old_states[k].read(m), old_states[k].read(n)) exchanged_states[k] = exchanged_states[k].write(m, new_m) exchanged_states[k] = exchanged_states[k].write(n, new_n) return i + 1, exchanged_states # At this point, exchanged_states[k] is a length num_replicas TensorArray. return tf.while_loop(cond, body, [tf.constant(0), exchanged_states])[1] # Remove `i`
def random_uniform(*args, **kwargs): if hasattr(tf, 'random') and hasattr(tf.random, 'set_seed'): tf.random.set_seed(12345) return tf.random.uniform(*args, **kwargs) tf.set_random_seed(12345) return tf.random_uniform(*args, **kwargs)
output = LeakyReLU(output) output = tf.layers.dropout(output, rate=.2) output = lib.ops.linear.Linear('Discriminator.Output', 512, 1, output) return tf.reshape(output, [-1]) ''' losses ''' real_x_int = tf.placeholder(tf.int32, shape=[BATCH_SIZE, OUTPUT_DIM]) real_x = tf.reshape(2 * ((tf.cast(real_x_int, tf.float32) / 256.) - .5), [BATCH_SIZE, OUTPUT_DIM]) real_x += tf.random_uniform(shape=[BATCH_SIZE, OUTPUT_DIM], minval=0., maxval=1. / 128) # dequantize q_z = Extractor(real_x) q_k_logits, q_k = HyperExtractor(q_z) q_k_probs = tf.nn.softmax(q_k_logits) rec_x = Generator(q_z) hyper_p_z = tf.random_normal([BATCH_SIZE, DIM_LATENT]) hyper_p_k = tf.one_hot(indices=prior_k.sample(BATCH_SIZE), depth=N_COMS) p_z = HyperGenerator(hyper_p_k, hyper_p_z) fake_x = Generator(p_z) if MODE in ['local_ep', 'local_epce']: disc_fake, disc_real = [], [] disc_fake.append(HyperDiscriminator(p_z, hyper_p_k)) disc_real.append(HyperDiscriminator(q_z, q_k)) disc_fake.append(Discriminator(fake_x, p_z))
encoder_inputs = tf.placeholder(shape=(None, None), dtype=tf.int32, name='encoder_inputs') #contains the lengths for each of the sequence in the batch, we will pad so all the same #if you don't want to pad, check out dynamic memory networks to input variable length sequences encoder_inputs_length = tf.placeholder(shape=(None, ), dtype=tf.int32, name='encoder_inputs_length') decoder_targets = tf.placeholder(shape=(None, None), dtype=tf.int32, name='decoder_targets') #randomly initialized embedding matrrix that can fit input sequence #used to convert sequences to vectors (embeddings) for both encoder and decoder of the right size #reshaping is a thing, in TF you gotta make sure you tensors are the right shape (num dimensions) embeddings = tf.Variable(tf.random_uniform([vocab_size, input_embedding_size], -1.0, 1.0), dtype=tf.float32) #this thing could get huge in a real world application encoder_inputs_embedded = tf.nn.embedding_lookup(embeddings, encoder_inputs) from tensorflow.python.ops.rnn_cell import LSTMCell, LSTMStateTuple encoder_cell = LSTMCell(encoder_hidden_units) #get outputs and states #bidirectional RNN function takes a separate cell argument for #both the forward and backward RNN, and returns separate #outputs and states for both the forward and backward RNN #When using a standard RNN to make predictions we are only taking the “past” into account.
import tensorflow as tf import math vocabulary_size = 10000 embedding_size = 128 examples = [3, 3, 3, 3, 10, 10, 10, 10] labels = [2, 1, 3, 5, 3, 5, 6, 82] batch_size = 8 num_samples = 8 #num_samples 为采样个数 ###构建计算流图 # 首先定义词向量矩阵,也称为 embedding matrix,这个是我们需要通过训练得到的词向量,其中vocabulary_size表示词典大小, # embedding_size表示词向量的维度,那么词向量矩阵为 vocabulary_size × embedding_size,利用均匀分布对它进行随机初始化: embeddings = tf.Variable( tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) #定义权值矩阵和偏置向量,并初始化为0: weights = tf.Variable( tf.truncated_normal([vocabulary_size, embedding_size], stddev=1.0 / math.sqrt(embedding_size))) biases = tf.Variable(tf.zeros([vocabulary_size])) #给定一个batch的输入,从词向量矩阵中找到对应的向量表示,以及从权值矩阵和偏置向量中找到对应正确输出的参数, # 其中examples是输入词,labels为对应的正确输出,一维向量表示,每个元素为词在字典中编号: # Embeddings for examples: [batch_size, embedding_size] example_emb = tf.nn.embedding_lookup(embeddings, examples) # Weights for labels: [batch_size, embedding_size] true_w = tf.nn.embedding_lookup(weights, labels) # Biases for labels: [batch_size, 1] true_b = tf.nn.embedding_lookup(biases, labels)
def build_model(sess, graph, loss_model): """ Builds a tensor graph model """ model = None with graph.as_default(): # Ops and variables pinned to the CPU because of missing GPU implementation with tf.device('/cpu:0'): # Input data. train_inputs = tf.placeholder(tf.int32, shape=[batch_size]) train_labels = tf.placeholder(tf.int32, shape=[batch_size, 1]) valid_dataset = tf.constant(valid_examples, dtype=tf.int32) global_step = tf.Variable(0, trainable=False) # Look up embeddings for inputs. embeddings = tf.Variable( tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) embed = tf.nn.embedding_lookup(embeddings, train_inputs) sm_weights = tf.Variable( tf.truncated_normal([vocabulary_size, embedding_size], stddev=1.0 / math.sqrt(embedding_size))) # Get context embeddings from lables true_w = tf.nn.embedding_lookup(sm_weights, train_labels) true_w = tf.reshape(true_w, [-1, embedding_size]) # Construct the variables for the NCE loss nce_weights = tf.Variable( tf.truncated_normal([vocabulary_size, embedding_size], stddev=1.0 / math.sqrt(embedding_size))) nce_biases = tf.Variable(tf.zeros([vocabulary_size])) if loss_model == 'cross_entropy': loss = tf.reduce_mean(tf_func.cross_entropy_loss(embed, true_w)) else: # sample negative examples with unigram probability sample = np.random.choice(vocabulary_size, num_sampled, p=unigram_prob, replace=False) loss = tf.reduce_mean( tf_func.nce_loss(embed, nce_weights, nce_biases, train_labels, sample, unigram_prob)) # tf.summary.scalar('loss', loss) # Construct the SGD optimizer using a learning rate of 1.0. optimizer = tf.train.GradientDescentOptimizer(1).minimize( loss, global_step=global_step) # Compute the cosine similarity between minibatch examples and all embeddings. norm = tf.sqrt(tf.reduce_sum(tf.square(embeddings), 1, keep_dims=True)) normalized_embeddings = embeddings / norm valid_embeddings = tf.nn.embedding_lookup(normalized_embeddings, valid_dataset) similarity = tf.matmul(valid_embeddings, normalized_embeddings, transpose_b=True) saver = tf.train.Saver(tf.global_variables()) # Save summary # summary = tf.summary.merge_all() # summary_writer = tf.summary.FileWriter(summary_path + '/summary', sess.graph) summary = None summary_writer = None tf.global_variables_initializer().run() print("Initialized") model = Word2Vec(train_inputs, train_labels, loss, optimizer, global_step, embeddings, normalized_embeddings, valid_embeddings, similarity, saver, summary, summary_writer) return model
def autoencoder(input_shape=[None, 784], n_filters=[1, 10, 10, 10], filter_sizes=[3, 3, 3, 3], corruption=False): """Build a deep denoising autoencoder w/ tied weights. Parameters ---------- input_shape : list, optional Description n_filters : list, optional Description filter_sizes : list, optional Description Returns ------- x : Tensor Input placeholder to the network z : Tensor Inner-most latent representation y : Tensor Output reconstruction of the input cost : Tensor Overall cost to use for training Raises ------ ValueError Description """ # %% # input to the network x = tf.placeholder(tf.float32, input_shape, name='x') # %% # Optionally apply denoising autoencoder if corruption: x_noise = corrupt(x) else: x_noise = x # %% # ensure 2-d is converted to square tensor. if len(x.get_shape()) == 2: x_dim = np.sqrt(x_noise.get_shape().as_list()[1]) if x_dim != int(x_dim): raise ValueError('Unsupported input dimensions') x_dim = int(x_dim) x_tensor = tf.reshape(x_noise, [-1, x_dim, x_dim, n_filters[0]]) elif len(x_noise.get_shape()) == 4: x_tensor = x_noise else: raise ValueError('Unsupported input dimensions') current_input = x_tensor # %% # Build the encoder encoder = [] shapes = [] for layer_i, n_output in enumerate(n_filters[1:]): n_input = current_input.get_shape().as_list()[3] shapes.append(current_input.get_shape().as_list()) W = tf.Variable( tf.random_uniform([ filter_sizes[layer_i], filter_sizes[layer_i], n_input, n_output ], -1.0 / math.sqrt(n_input), 1.0 / math.sqrt(n_input))) b = tf.Variable(tf.zeros([n_output])) encoder.append(W) output = lrelu( tf.add( tf.nn.conv2d(current_input, W, strides=[1, 2, 2, 1], padding='SAME'), b)) current_input = output # %% # store the latent representation z = current_input encoder.reverse() shapes.reverse() # %% # Build the decoder using the same weights for layer_i, shape in enumerate(shapes): W = encoder[layer_i] b = tf.Variable(tf.zeros([W.get_shape().as_list()[2]])) output = lrelu( tf.add( tf.nn.deconv2d( current_input, W, tf.pack([tf.shape(x)[0], shape[1], shape[2], shape[3]]), strides=[1, 2, 2, 1], padding='SAME'), b)) current_input = output # %% # now have the reconstruction through the network y = current_input # cost function measures pixel-wise difference cost = tf.reduce_sum(tf.square(y - x_tensor)) # %% return {'x': x, 'z': z, 'y': y, 'cost': cost}
def _sample(logits: tf.Tensor): uniform = tf.random_uniform(tf.shape(logits)) return tf.argmax(logits - tf.log(-tf.log(uniform)), axis=-1, name="action")
def xavier_init(fan_in, fan_out, constant=1): low=-constant*np.sqrt(6.0/(fan_in+fan_out)) high=constant*np.sqrt(6.0/(fan_in+fan_out)) return tf.random_uniform((fan_in, fan_out), minval=low, maxval=high, dtype=tf.float32)
def __init__( self, sequence_length, num_classes, vocab_size, embedding_size, num_hidden, batch_size, init_state, cell_type): # Placeholders for input, output and dropout self.input_x = tf.placeholder(tf.int32, [None, sequence_length], name="input_x") self.input_y = tf.placeholder(tf.int32, [None, num_classes], name="input_y") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") # Embedding layer with tf.device('/cpu:0'), tf.name_scope("embedding"): W = tf.Variable( tf.random_uniform([vocab_size, embedding_size], -1.0, 1.0), name="W") self.embedded_words = tf.nn.embedding_lookup(W, self.input_x) #[batch, n_timesteps, n_inputs] # rnn layer with tf.device('/cpu:0'), tf.name_scope("rnn"): if cell_type == 'vanlia': # create a BasicRNNCell self.rnn_cell = tf.contrib.rnn.BasicLSTMCell(num_hidden) elif cell_type == 'lstm': # create a LSTMCell self.rnn_cell = tf.nn.rnn_cell.LSTMCell(num_hidden) elif cell_type == 'gru': # create a GRUCell self.rnn_cell = tf.nn.rnn_cell.GRUCell(num_hidden) else: # create a BasicRNNCell self.rnn_cell = tf.contrib.rnn.BasicLSTMCell(num_hidden) # 'outputs' is a tensor of shape [batch_size, max_time, cell_state_size] # cal rnn layer with tf.name_scope("rnn"): if init_state is True: ## Use Initial State # defining initial state self.initial_state = self.rnn_cell.zero_state(batch_size, dtype=tf.float32) # 'state' is a tensor of shape [batch_size, cell_state_size] # print('\nself.embedded_words:{}\n'.format(np.shape(self.embedded_words))) self.outputs, states = tf.nn.dynamic_rnn(self.rnn_cell, self.embedded_words, initial_state=self.initial_state, dtype=tf.float32) else: ## Do Not Use Initial State self.outputs, states = tf.nn.dynamic_rnn(self.rnn_cell, self.embedded_words, dtype=tf.float32) # Final (unnormalized) scores and predictions with tf.name_scope("output"): W = tf.get_variable( "W", shape=[num_hidden, num_classes], initializer=tf.contrib.layers.xavier_initializer()) b = tf.Variable(tf.constant(0.1, shape=[num_classes]), name="b") self.transpose_outputs = tf.transpose(self.outputs, perm=[1, 0, 2]) #[n_timesteps, batch, n_inputs] self.scores = tf.nn.xw_plus_b(self.transpose_outputs[-1], W, b, name="scores") self.predictions = tf.argmax(self.scores, 1, name="predictions") # print('\npredictions:{}\n'.format(np.shape(self.predictions))) # Calculate mean cross-entropy loss with tf.name_scope("loss"): losses = tf.nn.softmax_cross_entropy_with_logits(logits=self.scores, labels=self.input_y) self.loss = tf.reduce_mean(losses) # Accuracy with tf.name_scope("accuracy"): correct_predictions = tf.equal(self.predictions, tf.argmax(self.input_y, 1)) self.accuracy = tf.reduce_mean(tf.cast(correct_predictions, "float"), name="accuracy")
def resnet_v2_200(inputs, num_classes=None, global_pool=True, reuse=None, scope='resnet_v2_200'): blocks = [ Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), Block('block2', bottleneck, [(512, 128, 1)] * 23 + [(512, 128, 2)]), Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 2)]), Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] return resnet_v2(inputs, blocks, num_classes, global_pool, include_root_block=True, reuse=reuse, scope=scope) batch_size = 32 height, width = 224, 224 inputs = tf.random_uniform((batch_size, height, width, 3)) with slim.arg_scope(resnet_arg_scope(is_training=False)): nets, end_points = resnet_v2_101(inputs, 1000) init = tf.global_variables_initializer() sess = tf.Session() sess.run(init) num_batches = 100 time_tensorflow(sess, nets, 'Forword')