def _update_lipschitz(self,v,i): config = self.config if len(v.shape) > 1: k = self.config.weight_constraint_k or 100.0000 wi_hat = v if len(v.shape) == 4: #fij = tf.reduce_sum(tf.abs(wi_hat), axis=[0,1]) fij = wi_hat fij = tf.reduce_sum(tf.abs(fij), axis=[1]) fij = tf.reduce_max(fij, axis=[0]) else: fij = wi_hat if self.config.ortho_pnorm == "inf": wp = tf.reduce_max(tf.reduce_sum(tf.abs(fij), axis=0), axis=0) else: # conv wp = tf.reduce_max(tf.reduce_sum(tf.abs(fij), axis=1), axis=0) ratio = (1.0/tf.maximum(1.0, wp/k)) if self.config.weight_bounce: bounce = tf.minimum(1.0, tf.ceil(wp/k-0.999)) ratio -= tf.maximum(0.0, bounce) * 0.2 if self.config.weight_scaleup: up = tf.minimum(1.0, tf.ceil(0.02-wp/k)) ratio += tf.maximum(0.0, up) * k/wp * 0.2 wi = ratio*(wi_hat) #self.gan.metrics['wi'+str(i)]=wp #self.gan.metrics['wk'+str(i)]=ratio #self.gan.metrics['bouce'+str(i)]=bounce return tf.assign(v, wi) return None
def dot_product_attention(self,x_sen,y_sen,x_len,y_len): ''' function: use the dot-production of left_sen and right_sen to compute the attention weight matrix :param left_sen: a list of 2D tensor (x_len,hidden_units) :param right_sen: a list of 2D tensor (y_len,hidden_units) :return: (1) weighted_y: the weightd sum of y_sen, a 3D tensor with shape (b,x_len,2*h) (2)weghted_x: the weighted sum of x_sen, a 3D tensor with shape (b,y_len,2*h) ''' weight_matrix =tf.matmul(x_sen, tf.transpose(y_sen,perm=[0,2,1])) #(b,x_len,h) x (b,h,y_len)->(b,x_len,y_len) weight_matrix_y =tf.exp(weight_matrix - tf.reduce_max(weight_matrix,axis=2,keep_dims=True)) #(b,x_len,y_len) weight_matrix_x =tf.exp(tf.transpose((weight_matrix - tf.reduce_max(weight_matrix,axis=1,keep_dims=True)),perm=[0,2,1])) #(b,y_len,x_len) weight_matrix_y=weight_matrix_y*self.y_mask[:,None,:]#(b,x_len,y_len)*(b,1,y_len) weight_matrix_x=weight_matrix_x*self.x_mask[:,None,:]#(b,y_len,x_len)*(b,1,x_len) alpha=weight_matrix_y/(tf.reduce_sum(weight_matrix_y,2,keep_dims=True)+1e-8)#(b,x_len,y_len) beta=weight_matrix_x/(tf.reduce_sum(weight_matrix_x,2,keep_dims=True)+1e-8)#(b,y_len,x_len) #(b,1,y_len,2*h)*(b,x_len,y_len,1)*=>(b,x_len,y_len,2*h) =>(b,x_len,2*h) weighted_y =tf.reduce_sum(tf.expand_dims(y_sen,1) *tf.expand_dims(alpha,-1),2) #(b,1,x_len,2*h)*(b,y_len,x_len,1) =>(b,y_len,x_len,2*h) =>(b,y_len,2*h) weighted_x =tf.reduce_sum(tf.expand_dims(x_sen,1) * tf.expand_dims(beta,-1),2) return weighted_y,weighted_x
def _tf_loss(self, sim, sim_emb): """Define loss""" if self.use_max_sim_neg: max_sim_neg = tf.reduce_max(sim[:, 1:], -1) loss = tf.reduce_mean(tf.maximum(0., self.mu_pos - sim[:, 0]) + tf.maximum(0., self.mu_neg + max_sim_neg)) else: # create an array for mu mu = self.mu_neg * np.ones(self.num_neg + 1) mu[0] = self.mu_pos factors = tf.concat([-1 * tf.ones([1, 1]), tf.ones([1, tf.shape(sim)[1] - 1])], 1) max_margin = tf.maximum(0., mu + factors * sim) loss = tf.reduce_mean(tf.reduce_sum(max_margin, -1)) max_sim_emb = tf.maximum(0., tf.reduce_max(sim_emb, -1)) loss = (loss + # penalize max similarity between intent embeddings tf.reduce_mean(max_sim_emb) * self.C_emb + # add regularization losses tf.losses.get_regularization_loss()) return loss
def _create_aggregate_input(self, v1, v2): """Create and return the input to the aggregate step. Parameters ---------- v1: tf.Tensor Tensor with shape (batch, time_steps, num_units). v2: tf.Tensor Tensor with shape (batch, time_steps, num_units) Returns ------- input_tensor: tf.Tensor A tensor with shape (batch, num_aggregate_inputs) """ # sum over time steps; resulting shape is (batch, num_units) v1 = utils.text.mask3d(v1, self._m_sentence1_size, 0, 1) v2 = utils.text.mask3d(v2, self._m_sentence2_size, 0, 1) v1_sum = tf.reduce_sum(v1, 1) v2_sum = tf.reduce_sum(v2, 1) v1_max = tf.reduce_max(v1, 1) v2_max = tf.reduce_max(v2, 1) return tf.concat(axis=1, values=[v1_sum, v2_sum, v1_max, v2_max])
def gen_debug_td_error_summaries( target_q_values, q_values, td_targets, td_errors): """Generates debug summaries for critic given a set of batch samples. Args: target_q_values: set of predicted next stage values. q_values: current predicted value for the critic network. td_targets: discounted target_q_values with added next stage reward. td_errors: the different between td_targets and q_values. """ with tf.name_scope('td_errors'): tf.summary.histogram('td_targets', td_targets) tf.summary.histogram('q_values', q_values) tf.summary.histogram('target_q_values', target_q_values) tf.summary.histogram('td_errors', td_errors) with tf.name_scope('td_targets'): tf.summary.scalar('mean', tf.reduce_mean(td_targets)) tf.summary.scalar('max', tf.reduce_max(td_targets)) tf.summary.scalar('min', tf.reduce_min(td_targets)) with tf.name_scope('q_values'): tf.summary.scalar('mean', tf.reduce_mean(q_values)) tf.summary.scalar('max', tf.reduce_max(q_values)) tf.summary.scalar('min', tf.reduce_min(q_values)) with tf.name_scope('target_q_values'): tf.summary.scalar('mean', tf.reduce_mean(target_q_values)) tf.summary.scalar('max', tf.reduce_max(target_q_values)) tf.summary.scalar('min', tf.reduce_min(target_q_values)) with tf.name_scope('td_errors'): tf.summary.scalar('mean', tf.reduce_mean(td_errors)) tf.summary.scalar('max', tf.reduce_max(td_errors)) tf.summary.scalar('min', tf.reduce_min(td_errors)) tf.summary.scalar('mean_abs', tf.reduce_mean(tf.abs(td_errors)))
def convolution(self, inputs, num_units): x = tf.expand_dims(inputs, 3) chan_in = 1 #Bigram w_bigram = tf.get_variable("w_bigram", shape= [2,50,chan_in,num_units], initializer= tf.contrib.layers.xavier_initializer_conv2d()) b_bigram = tf.get_variable("b_bigram", shape= [num_units]) y_bigram = self.nonlin(tf.nn.conv2d(x, w_bigram, strides= [1,1,1,1], padding='VALID') + b_bigram) h_bigram = tf.reduce_max(tf.squeeze(y_bigram) , 1) #Trigram w_trigram = tf.get_variable("w_trigram", shape= [3,50,chan_in,num_units], initializer= tf.contrib.layers.xavier_initializer_conv2d()) b_trigram = tf.get_variable("b_trigram", shape= [num_units]) y_trigram = self.nonlin(tf.nn.conv2d(x, w_trigram, strides= [1,1,1,1], padding='VALID') + b_trigram) h_trigram = tf.reduce_max(tf.squeeze(y_trigram) , 1) #Quin-gram w_quingram = tf.get_variable("w_quingram", shape= [3,50,chan_in,num_units], initializer= tf.contrib.layers.xavier_initializer_conv2d()) b_quingram = tf.get_variable("b_quingram", shape= [num_units]) y_quingram = self.nonlin(tf.nn.conv2d(x, w_trigram, strides= [1,1,1,1], padding='VALID') + b_trigram) h_quingram = tf.reduce_max(tf.squeeze(y_quingram) , 1) if self.hyperparams['conv_type'] == 'bigram': h = h_bigram elif self.hyperparams['conv_type'] == 'trigram': h = h_trigram elif self.hyperparams['conv_type'] == 'quingram': h = h_quingram elif self.hyperparams['conv_type'] == 'inception': h = tf.concat(1, [h_bigram, h_trigram, h_quingram]) return h
def check_convergence(self, new_T0, new_transition, new_emission): delta_T0 = tf.reduce_max(tf.abs(self.T0 - new_T0)) < self.epsilon delta_T = tf.reduce_max(tf.abs(self.T - new_transition)) < self.epsilon delta_E = tf.reduce_max(tf.abs(self.E - new_emission)) < self.epsilon return tf.logical_and(tf.logical_and(delta_T0, delta_T), delta_E)
def testArgRenames(self): with self.test_session(): a = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]] b = [[True, False, False], [False, True, True]] dim0 = [1] dim1 = [1] self.assertAllEqual(tf.reduce_any(b, reduction_indices=dim0).eval(), [True, True]) self.assertAllEqual(tf.reduce_all(b, reduction_indices=[0]).eval(), [False, False, False]) self.assertAllEqual(tf.reduce_all(b, reduction_indices=dim1).eval(), [False, False]) self.assertAllEqual(tf.reduce_sum(a, reduction_indices=[1]).eval(), [6.0, 15.0]) self.assertAllEqual(tf.reduce_sum(a, reduction_indices=[0, 1]).eval(), 21.0) self.assertAllEqual(tf.reduce_sum(a, [0, 1]).eval(), 21.0) self.assertAllEqual(tf.reduce_prod(a, reduction_indices=[1]).eval(), [6.0, 120.0]) self.assertAllEqual(tf.reduce_prod(a, reduction_indices=[0, 1]).eval(), 720.0) self.assertAllEqual(tf.reduce_prod(a, [0, 1]).eval(), 720.0) self.assertAllEqual(tf.reduce_mean(a, reduction_indices=[1]).eval(), [2.0, 5.0]) self.assertAllEqual(tf.reduce_mean(a, reduction_indices=[0, 1]).eval(), 3.5) self.assertAllEqual(tf.reduce_mean(a, [0, 1]).eval(), 3.5) self.assertAllEqual(tf.reduce_min(a, reduction_indices=[1]).eval(), [1.0, 4.0]) self.assertAllEqual(tf.reduce_min(a, reduction_indices=[0, 1]).eval(), 1.0) self.assertAllEqual(tf.reduce_min(a, [0, 1]).eval(), 1.0) self.assertAllEqual(tf.reduce_max(a, reduction_indices=[1]).eval(), [3.0, 6.0]) self.assertAllEqual(tf.reduce_max(a, reduction_indices=[0, 1]).eval(), 6.0) self.assertAllEqual(tf.reduce_max(a, [0, 1]).eval(), 6.0) self.assertAllClose(tf.reduce_logsumexp(a, reduction_indices=[1]).eval(), [3.40760589, 6.40760612]) self.assertAllClose(tf.reduce_logsumexp(a, reduction_indices=[0, 1]).eval(), 6.45619344711) self.assertAllClose(tf.reduce_logsumexp(a, [0, 1]).eval(), 6.45619344711) self.assertAllEqual(tf.expand_dims([[1, 2], [3, 4]], dim=1).eval(), [[[1, 2]], [[3, 4]]])
def to_binary_tf(bar_or_track_bar, threshold=0.0, track_mode=False, melody=False): """Return the binarize tensor of the input tensor (be careful of the channel order!)""" if track_mode: # melody track if melody: melody_is_max = tf.equal(bar_or_track_bar, tf.reduce_max(bar_or_track_bar, axis=2, keep_dims=True)) melody_pass_threshold = (bar_or_track_bar > threshold) out_tensor = tf.logical_and(melody_is_max, melody_pass_threshold) # non-melody track else: out_tensor = (bar_or_track_bar > threshold) return out_tensor else: if len(bar_or_track_bar.get_shape()) == 4: melody_track = tf.slice(bar_or_track_bar, [0, 0, 0, 0], [-1, -1, -1, 1]) other_tracks = tf.slice(bar_or_track_bar, [0, 0, 0, 1], [-1, -1, -1, -1]) elif len(bar_or_track_bar.get_shape()) == 5: melody_track = tf.slice(bar_or_track_bar, [0, 0, 0, 0, 0], [-1, -1, -1, -1, 1]) other_tracks = tf.slice(bar_or_track_bar, [0, 0, 0, 0, 1], [-1, -1, -1, -1, -1]) # melody track melody_is_max = tf.equal(melody_track, tf.reduce_max(melody_track, axis=2, keep_dims=True)) melody_pass_threshold = (melody_track > threshold) out_tensor_melody = tf.logical_and(melody_is_max, melody_pass_threshold) # other tracks out_tensor_others = (other_tracks > threshold) if len(bar_or_track_bar.get_shape()) == 4: return tf.concat([out_tensor_melody, out_tensor_others], 3) elif len(bar_or_track_bar.get_shape()) == 5: return tf.concat([out_tensor_melody, out_tensor_others], 4)
def attentive_pooling_weights(U_AP, raw_question_rep, raw_answer_rep, tokens_question, tokens_answer, apply_softmax=True): """Calculates the attentive pooling weights for question and answer :param U_AP: the soft-attention similarity matrix (to learn) :param raw_question_rep: :param raw_answer_rep: :param tokens_question: The raw token indices of the question. Used to detection not-set tokens :param tokens_answer: The raw token indices of the answer. Used to detection not-set tokens :param Q_PW: Positional weighting matrix for the question :param A_PW: Positional weighting matrix for the answer :param apply_softmax: :return: question weights, answer weights """ tokens_question_float = tf.to_float(tokens_question) tokens_answer_float = tf.to_float(tokens_answer) tokens_question_non_zero = non_zero_tokens(tokens_question_float) tokens_answer_non_zero = non_zero_tokens(tokens_answer_float) G = soft_alignment(U_AP, raw_question_rep, raw_answer_rep, tokens_question_non_zero, tokens_answer_non_zero) maxpool_GQ = tf.reduce_max(G, [2], keep_dims=False) maxpool_GA = tf.reduce_max(G, [1], keep_dims=False) if apply_softmax: attention_Q = attention_softmax(maxpool_GQ, tokens_question_non_zero) attention_A = attention_softmax(maxpool_GA, tokens_answer_non_zero) else: attention_Q = maxpool_GQ attention_A = maxpool_GA return attention_Q, attention_A
def interface(self): self.W, self.b, self.logits = [], [], [self.x] with tf.name_scope('main_layer') as scope: if self.activate == 'Maxout': w_shape = [self.filter[0], self.filter[1], self.filter[2], self.filter[3] * self.n] b_shape = [self.hidden_nodes * self.n] self.W.append(weight_variable(w_shape)) self.b.append(bias_variable(b_shape)) t0 = tf.nn.conv2d(self.logits[0], self.W[0], strides = self.strides, padding = self.padding) + self.b[0] s = t0.get_shape() t1 = tf.reshape(t0, [-1, int(s.dims[1]), int(s.dims[2]), self.hidden_nodes, self.n]) t2 = tf.reduce_max(t1, 4) self.logits.append(t2) w_shape = [self.filter[0], self.filter[1], self.hidden_nodes, self.filter[2] * self.n] b_shape = [self.filter[2] * self.n] self.W.append(weight_variable(w_shape)) self.b.append(bias_variable(b_shape)) t0 = tf.nn.conv2d(self.logits[1], self.W[1], strides = self.strides, padding = self.padding) + self.b[1] s = t0.get_shape() t1 = tf.reshape(t0, [-1, int(s.dims[1]), int(s.dims[2]), self.filter[2], self.n]) t2 = tf.reduce_max(t1, 4) self.logits.append(t2) self.y = self.logits[len(self.logits) - 1] else: w_shape = self.filter b_shape = [self.hidden_nodes] self.W.append(weight_variable(w_shape)) self.b.append(bias_variable(b_shape)) self.logits.append(tf.nn.relu(tf.nn.conv2d(self.logits[0], self.W[0], strides = self.strides, padding = self.padding) + self.b[0])) w_shape = [self.filter[0], self.filter[1], self.hidden_nodes, self.filter[2]] b_shape = [self.filter[2]] self.W.append(weight_variable(w_shape)) self.b.append(bias_variable(b_shape)) self.logits.append(tf.nn.relu(tf.nn.conv2d(self.logits[1], self.W[1], strides = self.strides, padding = self.padding) + self.b[1])) self.y = self.logits[len(self.logits) - 1]
def call(self, x, mask=None): x1 ,x2 = x outer = tf.matmul(tf.expand_dims(x1, axis=2), tf.expand_dims(x2, axis=1)) outer = tf.matrix_band_part(outer, 0, self.ans_limit) output1 = tf.reshape(tf.cast(tf.argmax(tf.reduce_max(outer, axis=2), axis=1), tf.float32),(-1,1)) output2 = tf.reshape(tf.cast(tf.argmax(tf.reduce_max(outer, axis=1), axis=1), tf.float32),(-1,1)) return [output1, output2]
def prepare_decoder_components(self): self.decoder_cell = tf.nn.rnn_cell.MultiRNNCell([self.lstm_cell() for _ in range(self.n_layers)]) Y_vocab_size = len(self.Y_word2idx) self.decoder_embedding = tf.get_variable('decoder_embedding', [Y_vocab_size, self.decoder_embedding_dim], tf.float32, tf.random_uniform_initializer(-1.0, 1.0)) self.projection_layer = Dense(Y_vocab_size) self.X_seq_max_len = tf.reduce_max(self.X_seq_len) self.Y_seq_max_len = tf.reduce_max(self.Y_seq_len)
def kl(self, other): a0 = self.logits - tf.reduce_max(self.logits, axis=-1, keep_dims=True) a1 = other.logits - tf.reduce_max(other.logits, axis=-1, keep_dims=True) ea0 = tf.exp(a0) ea1 = tf.exp(a1) z0 = tf.reduce_sum(ea0, axis=-1, keep_dims=True) z1 = tf.reduce_sum(ea1, axis=-1, keep_dims=True) p0 = ea0 / z0 return tf.reduce_sum(p0 * (a0 - tf.log(z0) - a1 + tf.log(z1)), axis=-1)
def coverage_box(bboxes): y_min, x_min, y_max, x_max = tf.split( value=bboxes, num_or_size_splits=4, axis=1) y_min_coverage = tf.reduce_min(y_min, axis=0) x_min_coverage = tf.reduce_min(x_min, axis=0) y_max_coverage = tf.reduce_max(y_max, axis=0) x_max_coverage = tf.reduce_max(x_max, axis=0) return tf.stack( [y_min_coverage, x_min_coverage, y_max_coverage, x_max_coverage], axis=1)
def forward(self, inputs): if self.data_format == 'channels_last': outputs = tf.reduce_max(input_tensor=inputs, axis=[1, 2, 3], name=self.name) elif self.data_format == 'channels_first': outputs = tf.reduce_max(input_tensor=inputs, axis=[2, 3, 4], name=self.name) else: raise ValueError( "`data_format` should have one of the following values: [`channels_last`, `channels_first`]" ) return outputs
def VI_Block(X, S1, S2, config): k = config.k # Number of value iterations performed ch_i = config.ch_i # Channels in input layer ch_h = config.ch_h # Channels in initial hidden layer ch_q = config.ch_q # Channels in q layer (~actions) state_batch_size = config.statebatchsize # k+1 state inputs for each channel bias = tf.Variable(np.random.randn(1, 1, 1, ch_h) * 0.01, dtype=tf.float32) # weights from inputs to q layer (~reward in Bellman equation) w0 = tf.Variable(np.random.randn(3, 3, ch_i, ch_h) * 0.01, dtype=tf.float32) w1 = tf.Variable(np.random.randn(1, 1, ch_h, 1) * 0.01, dtype=tf.float32) w = tf.Variable(np.random.randn(3, 3, 1, ch_q) * 0.01, dtype=tf.float32) # feedback weights from v layer into q layer (~transition probabilities in Bellman equation) w_fb = tf.Variable(np.random.randn(3, 3, 1, ch_q) * 0.01, dtype=tf.float32) w_o = tf.Variable(np.random.randn(ch_q, 8) * 0.01, dtype=tf.float32) # initial conv layer over image+reward prior h = conv2d_flipkernel(X, w0, name="h0") + bias r = conv2d_flipkernel(h, w1, name="r") q = conv2d_flipkernel(r, w, name="q") v = tf.reduce_max(q, axis=3, keep_dims=True, name="v") for i in range(0, k-1): rv = tf.concat([r, v], 3) wwfb = tf.concat([w, w_fb], 2) q = conv2d_flipkernel(rv, wwfb, name="q") v = tf.reduce_max(q, axis=3, keep_dims=True, name="v") # do one last convolution q = conv2d_flipkernel(tf.concat([r, v], 3), tf.concat([w, w_fb], 2), name="q") # CHANGE TO THEANO ORDERING # Since we are selecting over channels, it becomes easier to work with # the tensor when it is in NCHW format vs NHWC q = tf.transpose(q, perm=[0, 3, 1, 2]) # Select the conv-net channels at the state position (S1,S2). # This intuitively corresponds to each channel representing an action, and the convnet the Q function. # The tricky thing is we want to select the same (S1,S2) position *for each* channel and for each sample # TODO: performance can be improved here by substituting expensive # transpose calls with better indexing for gather_nd bs = tf.shape(q)[0] rprn = tf.reshape(tf.tile(tf.reshape(tf.range(bs), [-1, 1]), [1, state_batch_size]), [-1]) ins1 = tf.cast(tf.reshape(S1, [-1]), tf.int32) ins2 = tf.cast(tf.reshape(S2, [-1]), tf.int32) idx_in = tf.transpose(tf.stack([ins1, ins2, rprn]), [1, 0]) q_out = tf.gather_nd(tf.transpose(q, [2, 3, 0, 1]), idx_in, name="q_out") # add logits logits = tf.matmul(q_out, w_o) # softmax output weights output = tf.nn.softmax(logits, name="output") return logits, output
def kl(self, other): a0 = self.inputs - tf.reduce_max(self.inputs, reduction_indices=[1], keep_dims=True) a1 = other.inputs - tf.reduce_max(other.inputs, reduction_indices=[1], keep_dims=True) ea0 = tf.exp(a0) ea1 = tf.exp(a1) z0 = tf.reduce_sum(ea0, reduction_indices=[1], keep_dims=True) z1 = tf.reduce_sum(ea1, reduction_indices=[1], keep_dims=True) p0 = ea0 / z0 return tf.reduce_sum(p0 * (a0 - tf.log(z0) - a1 + tf.log(z1)), reduction_indices=[1])
def __init__(self, reuse=False, trainable=True): # Placeholders for our input # Our input are 4 RGB frames of shape 160, 160 each self.states = tf.placeholder(shape=[None, 84, 84, 4], dtype=tf.uint8, name="X") # The TD target value self.targets = tf.placeholder(shape=[None], dtype=tf.float32, name="y") X = tf.to_float(self.states) / 255.0 batch_size = tf.shape(self.states)[0] # Graph shared with Value Net with tf.variable_scope("shared", reuse=reuse): fc1 = build_shared_network(X, add_summaries=(not reuse)) with tf.variable_scope("value_net"): self.logits = tf.contrib.layers.fully_connected( inputs=fc1, num_outputs=1, activation_fn=None) self.logits = tf.squeeze(self.logits, squeeze_dims=[1], name="logits") self.losses = tf.squared_difference(self.logits, self.targets) self.loss = tf.reduce_sum(self.losses, name="loss") self.predictions = { "logits": self.logits } # Summaries prefix = tf.get_variable_scope().name tf.scalar_summary(self.loss.name, self.loss) tf.scalar_summary("{}/max_value".format(prefix), tf.reduce_max(self.logits)) tf.scalar_summary("{}/min_value".format(prefix), tf.reduce_min(self.logits)) tf.scalar_summary("{}/mean_value".format(prefix), tf.reduce_mean(self.logits)) tf.scalar_summary("{}/reward_max".format(prefix), tf.reduce_max(self.targets)) tf.scalar_summary("{}/reward_min".format(prefix), tf.reduce_min(self.targets)) tf.scalar_summary("{}/reward_mean".format(prefix), tf.reduce_mean(self.targets)) tf.histogram_summary("{}/reward_targets".format(prefix), self.targets) tf.histogram_summary("{}/values".format(prefix), self.logits) if trainable: # self.optimizer = tf.train.AdamOptimizer(1e-4) self.optimizer = tf.train.RMSPropOptimizer(0.00025, 0.99, 0.0, 1e-6) self.grads_and_vars = self.optimizer.compute_gradients(self.loss) self.grads_and_vars = [[grad, var] for grad, var in self.grads_and_vars if grad is not None] self.train_op = self.optimizer.apply_gradients(self.grads_and_vars, global_step=tf.contrib.framework.get_global_step()) var_scope_name = tf.get_variable_scope().name summary_ops = tf.get_collection(tf.GraphKeys.SUMMARIES) sumaries = [s for s in summary_ops if "policy_net" in s.name or "shared" in s.name] sumaries = [s for s in summary_ops if var_scope_name in s.name] self.summaries = tf.merge_summary(sumaries)
def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding=DEFAULT_PADDING, group=1, trainable=True): print name if isinstance(input, tuple): input = input[0] self.validate_padding(padding) c_i = input.get_shape()[-1] print c_i print input.get_shape().as_list() assert c_i%group==0 assert c_o%group==0 convolve = lambda i, k: tf.nn.conv2d(i, k, [1, s_h, s_w, 1], padding=padding) with tf.variable_scope(name) as scope: init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01) init_biases = tf.constant_initializer(0.0) kernel = self.make_var('weights', [k_h, k_w, c_i/group, c_o], init_weights, trainable) biases = self.make_var('biases', [c_o], init_biases, trainable) with tf.name_scope('summaries'): with tf.name_scope('weights'): mean = tf.reduce_mean(kernel) tf.summary.scalar('mean', mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(kernel- mean))) tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(kernel)) tf.summary.scalar('min', tf.reduce_min(kernel)) tf.summary.histogram('histogram', kernel) with tf.name_scope('biases'): mean = tf.reduce_mean(biases) tf.summary.scalar('mean', mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(biases- mean))) tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(biases)) tf.summary.scalar('min', tf.reduce_min(biases)) tf.summary.histogram('histogram', biases) if group==1: conv = convolve(input, kernel) else: input_groups = tf.split(3, group, input) kernel_groups = tf.split(3, group, kernel) output_groups = [convolve(i, k) for i,k in zip(input_groups, kernel_groups)] conv = tf.concat(3, output_groups) if relu: bias = tf.nn.bias_add(conv, biases) return tf.nn.relu(bias, name=scope.name) return tf.nn.bias_add(conv, biases, name=scope.name)
def _match_when_rows_are_non_empty(): """Performs matching when the rows of similarity matrix are non empty. Returns: matches: int32 tensor indicating the row each column matches to. """ # Matches for each column matches = tf.argmax(similarity_matrix, 0, output_type=tf.int32) # Deal with matched and unmatched threshold if self._matched_threshold is not None: # Get logical indices of ignored and unmatched columns as tf.int64 matched_vals = tf.reduce_max(similarity_matrix, 0) below_unmatched_threshold = tf.greater(self._unmatched_threshold, matched_vals) between_thresholds = tf.logical_and( tf.greater_equal(matched_vals, self._unmatched_threshold), tf.greater(self._matched_threshold, matched_vals)) if self._negatives_lower_than_unmatched: matches = self._set_values_using_indicator(matches, below_unmatched_threshold, -1) matches = self._set_values_using_indicator(matches, between_thresholds, -2) else: matches = self._set_values_using_indicator(matches, below_unmatched_threshold, -2) matches = self._set_values_using_indicator(matches, between_thresholds, -1) if self._force_match_for_each_row: similarity_matrix_shape = shape_utils.combined_static_and_dynamic_shape( similarity_matrix) force_match_column_ids = tf.argmax(similarity_matrix, 1, output_type=tf.int32) force_match_column_indicators = ( tf.one_hot( force_match_column_ids, depth=similarity_matrix_shape[1]) * tf.cast(tf.expand_dims(valid_rows, axis=-1), dtype=tf.float32)) force_match_row_ids = tf.argmax(force_match_column_indicators, 0, output_type=tf.int32) force_match_column_mask = tf.cast( tf.reduce_max(force_match_column_indicators, 0), tf.bool) final_matches = tf.where(force_match_column_mask, force_match_row_ids, matches) return final_matches else: return matches
def getTiles(img_arr): """Find and slice 64 chess tiles from image in 3D Matrix""" # Get our grayscale image matrix A = tf.Variable(img_arr) # X & Y gradients Dx = gradientx(A) Dy = gradienty(A) Dx_pos = tf.clip_by_value(Dx, 0., 255., name="dx_positive") Dx_neg = tf.clip_by_value(Dx, -255., 0., name='dx_negative') Dy_pos = tf.clip_by_value(Dy, 0., 255., name="dy_positive") Dy_neg = tf.clip_by_value(Dy, -255., 0., name='dy_negative') # 1-D ampltitude of hough transform of gradients about X & Y axes # Chessboard lines have strong positive and negative gradients within an axis hough_Dx = tf.reduce_sum(Dx_pos, 0) * tf.reduce_sum(-Dx_neg, 0) / (img_arr.shape[0]*img_arr.shape[0]) hough_Dy = tf.reduce_sum(Dy_pos, 1) * tf.reduce_sum(-Dy_neg, 1) / (img_arr.shape[1]*img_arr.shape[1]) # Slightly closer to 3/5 threshold, since they're such strong responses hough_Dx_thresh = tf.reduce_max(hough_Dx) * 3/5 hough_Dy_thresh = tf.reduce_max(hough_Dy) * 3/5 # Transition from TensorFlow to normal values (todo, do TF right) # Initialize A with image array input # tf.initialize_all_variables().run() # will reset CNN weights so be selective # Local tf session sess = tf.Session() sess.run(tf.initialize_variables([A], name='getTiles_init')) # Get chess lines (try a fiew sets) hdx, hdy, hdx_thresh, hdy_thresh = sess.run( [hough_Dx, hough_Dy, hough_Dx_thresh, hough_Dy_thresh]) lines_x, lines_y, is_match = getChessLines(hdx, hdy, hdx_thresh, hdy_thresh) for percentage in np.array([0.9, 0.8, 0.7, 0.6]): if is_match: break else: print("Trying %d%% of threshold" % (100*percentage)) lines_x, lines_y, is_match = getChessLines(hdx, hdy, hdx_thresh * percentage, hdy_thresh * percentage) # Get the tileset if is_match: return getChessTiles(img_arr, lines_x, lines_y) else: print("\tNo Match, lines found (dx/dy):", lines_x, lines_y) return [] # No match, no tiles
def __init__(self, config, word_mat=None, char_mat=None, test=False): # hyper-parameter self.char_dim = config['char_dim'] self.cont_limit = config['cont_limit'] if not test else 1000 self.ques_limit = config['ques_limit'] if not test else 100 self.char_limit = config['char_limit'] self.ans_limit = config['ans_limit'] self.filters = config['filters'] self.num_heads = config['num_heads'] self.batch_size = config['batch_size'] self.l2_norm = config['l2_norm'] self.decay = config['decay'] self.learning_rate = config['learning_rate'] self.grad_clip = config['grad_clip'] self.dropout = tf.placeholder_with_default(0.0, (), name="dropout") # embedding layer self.word_mat = tf.get_variable("word_mat", initializer=tf.constant(word_mat, dtype=tf.float32), trainable=False) self.char_mat = tf.get_variable("char_mat", initializer=tf.constant(char_mat, dtype=tf.float32), trainable=True) # input tensor self.contw_input_ = tf.placeholder(tf.int32, [None, self.cont_limit],"context_word") self.quesw_input_ = tf.placeholder(tf.int32, [None, self.ques_limit], "question_word") self.contc_input_ = tf.placeholder(tf.int32, [None, self.cont_limit, self.char_limit], "context_char") self.quesc_input_ = tf.placeholder(tf.int32, [None, self.ques_limit, self.char_limit], "question_char") self.y_start_ = tf.placeholder(tf.int32, [None, self.cont_limit], "answer_start_index") self.y_end_ = tf.placeholder(tf.int32, [None, self.cont_limit], "answer_end_index") self.c_mask = tf.cast(self.contw_input_, tf.bool) self.q_mask = tf.cast(self.quesw_input_, tf.bool) self.cont_len = tf.reduce_sum(tf.cast(self.c_mask, tf.int32), axis=1) self.ques_len = tf.reduce_sum(tf.cast(self.q_mask, tf.int32), axis=1) # slice for maxlen in each batch self.c_maxlen = tf.reduce_max(self.cont_len) self.q_maxlen = tf.reduce_max(self.ques_len) self.contw_input = tf.slice(self.contw_input_, [0, 0], [-1, self.c_maxlen]) self.quesw_input = tf.slice(self.quesw_input_, [0, 0], [-1, self.q_maxlen]) self.c_mask = tf.slice(self.c_mask, [0, 0], [-1, self.c_maxlen]) self.q_mask = tf.slice(self.q_mask, [0, 0], [-1, self.q_maxlen]) self.contc_input = tf.slice(self.contc_input_, [0, 0, 0], [-1, self.c_maxlen, self.char_limit]) self.quesc_input = tf.slice(self.quesc_input_, [0, 0, 0], [-1, self.q_maxlen, self.char_limit]) self.y_start = tf.slice(self.y_start_, [0, 0], [-1, self.c_maxlen]) self.y_end = tf.slice(self.y_end_, [0, 0], [-1, self.c_maxlen]) # init model & complie self.build_model() total_params() self.complie()
def _summary_vis(m, batch_size, num_steps, arop_full_summary_iters): arop = []; arop_summary_iters = []; arop_eval_fns = []; vis_value_ops = []; vis_goal_ops = []; vis_map_ops = []; vis_occupancy_ops = []; vis_conf_ops = []; for i, val_op in enumerate(m.value_ops): vis_value_op = tf.reduce_mean(tf.abs(val_op), axis=3, keep_dims=True) vis_value_ops.append(vis_value_op) vis_occupancy_op = tf.reduce_mean(tf.abs(m.occupancys[i]), 3, True) vis_occupancy_ops.append(vis_occupancy_op) vis_conf_op = tf.reduce_max(tf.abs(m.confs[i]), axis=3, keep_dims=True) vis_conf_ops.append(vis_conf_op) ego_goal_imgs_i_op = m.input_tensors['step']['ego_goal_imgs_{:d}'.format(i)] vis_goal_op = tf.reduce_max(ego_goal_imgs_i_op, 4, True) vis_goal_ops.append(vis_goal_op) vis_map_op = tf.reduce_mean(tf.abs(m.ego_map_ops[i]), 4, True) vis_map_ops.append(vis_map_op) vis_goal_ops = tf.concat(vis_goal_ops, 4) vis_map_ops = tf.concat(vis_map_ops, 4) vis_value_ops = tf.concat(vis_value_ops, 3) vis_occupancy_ops = tf.concat(vis_occupancy_ops, 3) vis_conf_ops = tf.concat(vis_conf_ops, 3) sh = tf.unstack(tf.shape(vis_value_ops))[1:] vis_value_ops = tf.reshape(vis_value_ops, shape=[batch_size, -1] + sh) sh = tf.unstack(tf.shape(vis_conf_ops))[1:] vis_conf_ops = tf.reshape(vis_conf_ops, shape=[batch_size, -1] + sh) sh = tf.unstack(tf.shape(vis_occupancy_ops))[1:] vis_occupancy_ops = tf.reshape(vis_occupancy_ops, shape=[batch_size,-1] + sh) # Save memory, only return time steps that need to be visualized, factor of # 32 CPU memory saving. id = np.int(num_steps/2) vis_goal_ops = tf.expand_dims(vis_goal_ops[:,id,:,:,:], axis=1) vis_map_ops = tf.expand_dims(vis_map_ops[:,id,:,:,:], axis=1) vis_value_ops = tf.expand_dims(vis_value_ops[:,id,:,:,:], axis=1) vis_conf_ops = tf.expand_dims(vis_conf_ops[:,id,:,:,:], axis=1) vis_occupancy_ops = tf.expand_dims(vis_occupancy_ops[:,id,:,:,:], axis=1) arop += [[vis_value_ops, vis_goal_ops, vis_map_ops, vis_occupancy_ops, vis_conf_ops]] arop_summary_iters += [arop_full_summary_iters] arop_eval_fns += [_vis] return arop, arop_summary_iters, arop_eval_fns
def print_act_stats(x, _str=""): if not do_print_act_stats: return x if hvd.rank() != 0: return x if len(x.get_shape()) == 1: x_mean, x_var = tf.nn.moments(x, [0], keep_dims=True) if len(x.get_shape()) == 2: x_mean, x_var = tf.nn.moments(x, [0], keep_dims=True) if len(x.get_shape()) == 4: x_mean, x_var = tf.nn.moments(x, [0, 1, 2], keep_dims=True) stats = [tf.reduce_min(x_mean), tf.reduce_mean(x_mean), tf.reduce_max(x_mean), tf.reduce_min(tf.sqrt(x_var)), tf.reduce_mean(tf.sqrt(x_var)), tf.reduce_max(tf.sqrt(x_var))] return tf.Print(x, stats, "["+_str+"] "+x.name)
def __init__(self, config, batch, word_mat=None, char_mat=None, trainable=True, opt=True): self.config = config self.global_step = tf.get_variable('global_step', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) self.c, self.q, self.ch, self.qh, self.y1, self.y2, self.qa_id = batch.get_next() self.is_train = tf.get_variable( "is_train", shape=[], dtype=tf.bool, trainable=False) self.word_mat = tf.get_variable("word_mat", initializer=tf.constant( word_mat, dtype=tf.float32), trainable=False) self.char_mat = tf.get_variable( "char_mat", initializer=tf.constant(char_mat, dtype=tf.float32)) self.c_mask = tf.cast(self.c, tf.bool) self.q_mask = tf.cast(self.q, tf.bool) self.c_len = tf.reduce_sum(tf.cast(self.c_mask, tf.int32), axis=1) self.q_len = tf.reduce_sum(tf.cast(self.q_mask, tf.int32), axis=1) if opt: N, CL = config.batch_size, config.char_limit self.c_maxlen = tf.reduce_max(self.c_len) self.q_maxlen = tf.reduce_max(self.q_len) self.c = tf.slice(self.c, [0, 0], [N, self.c_maxlen]) self.q = tf.slice(self.q, [0, 0], [N, self.q_maxlen]) self.c_mask = tf.slice(self.c_mask, [0, 0], [N, self.c_maxlen]) self.q_mask = tf.slice(self.q_mask, [0, 0], [N, self.q_maxlen]) self.ch = tf.slice(self.ch, [0, 0, 0], [N, self.c_maxlen, CL]) self.qh = tf.slice(self.qh, [0, 0, 0], [N, self.q_maxlen, CL]) self.y1 = tf.slice(self.y1, [0, 0], [N, self.c_maxlen]) self.y2 = tf.slice(self.y2, [0, 0], [N, self.c_maxlen]) else: self.c_maxlen, self.q_maxlen = config.para_limit, config.ques_limit self.ch_len = tf.reshape(tf.reduce_sum( tf.cast(tf.cast(self.ch, tf.bool), tf.int32), axis=2), [-1]) self.qh_len = tf.reshape(tf.reduce_sum( tf.cast(tf.cast(self.qh, tf.bool), tf.int32), axis=2), [-1]) self.ready() if trainable: self.lr = tf.get_variable( "lr", shape=[], dtype=tf.float32, trainable=False) self.opt = tf.train.AdadeltaOptimizer( learning_rate=self.lr, epsilon=1e-6) grads = self.opt.compute_gradients(self.loss) gradients, variables = zip(*grads) capped_grads, _ = tf.clip_by_global_norm( gradients, config.grad_clip) self.train_op = self.opt.apply_gradients( zip(capped_grads, variables), global_step=self.global_step)
def entropy(self): if use_tf150_api: a0 = self.inputs - tf.reduce_max( self.inputs, reduction_indices=[1], keepdims=True) else: a0 = self.inputs - tf.reduce_max( self.inputs, reduction_indices=[1], keep_dims=True) ea0 = tf.exp(a0) if use_tf150_api: z0 = tf.reduce_sum(ea0, reduction_indices=[1], keepdims=True) else: z0 = tf.reduce_sum(ea0, reduction_indices=[1], keep_dims=True) p0 = ea0 / z0 return tf.reduce_sum(p0 * (tf.log(z0) - a0), reduction_indices=[1])
def fc(self, input, num_out, name, relu=True, trainable=True): print name with tf.variable_scope(name) as scope: # only use the first input if isinstance(input, tuple): input = input[0] input_shape = input.get_shape() if input_shape.ndims == 4: dim = 1 for d in input_shape[1:].as_list(): dim *= d feed_in = tf.reshape(tf.transpose(input,[0,3,1,2]), [-1, dim]) else: feed_in, dim = (input, int(input_shape[-1])) if name == 'bbox_pred': init_weights = tf.truncated_normal_initializer(0.0, stddev=0.001) init_biases = tf.constant_initializer(0.0) else: init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01) init_biases = tf.constant_initializer(0.0) weights = self.make_var('weights', [dim, num_out], init_weights, trainable) biases = self.make_var('biases', [num_out], init_biases, trainable) with tf.name_scope('summaries'): with tf.name_scope('weights'): mean = tf.reduce_mean(weights) tf.summary.scalar('mean', mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(weights- mean))) tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(weights)) tf.summary.scalar('min', tf.reduce_min(weights)) tf.summary.histogram('histogram', weights) with tf.name_scope('biases'): mean = tf.reduce_mean(biases) tf.summary.scalar('mean', mean) with tf.name_scope('stddev'): stddev = tf.sqrt(tf.reduce_mean(tf.square(biases- mean))) tf.summary.scalar('stddev', stddev) tf.summary.scalar('max', tf.reduce_max(biases)) tf.summary.scalar('min', tf.reduce_min(biases)) tf.summary.histogram('histogram', biases) op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b fc = op(feed_in, weights, biases, name=scope.name) return fc
def attend(x, sequence_length=None, method="ave", context=None, feature_dim=None, mask_zero=False, maxlen=None, epsilon=1e-8, bn=True, training=False, seed=0, reuse=True, name="attend"): if method == "ave": if mask_zero: # None * step_dim mask = tf.sequence_mask(sequence_length, maxlen) mask = tf.reshape(mask, (-1, tf.shape(x)[1], 1)) mask = tf.cast(mask, tf.float32) z = tf.reduce_sum(x * mask, axis=1) l = tf.reduce_sum(mask, axis=1) # in some cases especially in the early stages of training the sum may be almost zero z /= tf.cast(l + epsilon, tf.float32) else: z = tf.reduce_mean(x, axis=1) elif method == "sum": if mask_zero: # None * step_dim mask = tf.sequence_mask(sequence_length, maxlen) mask = tf.reshape(mask, (-1, tf.shape(x)[1], 1)) mask = tf.cast(mask, tf.float32) z = tf.reduce_sum(x * mask, axis=1) else: z = tf.reduce_sum(x, axis=1) elif method == "max": if mask_zero: # None * step_dim mask = tf.sequence_mask(sequence_length, maxlen) mask = tf.expand_dims(mask, axis=-1) mask = tf.tile(mask, (1, 1, tf.shape(x)[2])) masked_data = tf.where(tf.equal(mask, tf.zeros_like(mask)), tf.ones_like(x) * -np.inf, x) # if masked assume value is -inf z = tf.reduce_max(masked_data, axis=1) else: z = tf.reduce_max(x, axis=1) elif method == "attention": if context is not None: step_dim = tf.shape(x)[1] context = tf.expand_dims(context, axis=1) context = tf.tile(context, [1, step_dim, 1]) y = tf.concat([x, context], axis=-1) else: y = x a = attention(y, feature_dim, sequence_length, mask_zero, maxlen, seed=seed) z = tf.reduce_sum(x * a, axis=1) if bn: # training=False has slightly better performance z = tf.layers.BatchNormalization()(z, training=False) # z = batch_normalization(z, training=training, name=name) return z
def variational_lowerbound(x, encoder, decoder, num_samples, batch_size, \ alpha = 1.0, backward_pass = '******'): """ Compute the loss function of VR lowerbound """ #logpxz, logqzx, z_list = reconstruction_loss(x, encoder, decoder, num_samples) logpxz = 0.0 logqzx = 0.0 L = len(encoder.S_layers) x_rep = tf.tile(x, [num_samples, 1]) input = x_rep # do encoding samples = [] for l in xrange(L): output, logq = encoder.S_layers[l].encode_and_log_prob(input) logqzx = logqzx + logq samples.append(output) input = output # do decoding samples = list(reversed(samples)) samples.append(x_rep) for l in xrange(L): _, logp = decoder.S_layers[l].encode_and_log_prob(samples[l], eval_output = samples[l+1]) logpxz = logpxz + logp logpz = log_prior(output, encoder.S_layers[l].get_prob_type()) logF = logpz + logpxz - logqzx if backward_pass == 'max': logF = tf.reshape(logF, [num_samples, batch_size]) logF = tf.reduce_max(logF, 0) lowerbound = tf.reduce_mean(logF) elif backward_pass == 'min': logF = tf.reshape(logF, [num_samples, batch_size]) logF = tf.reduce_min(logF, 0) lowerbound = tf.reduce_mean(logF) elif np.abs(alpha - 1.0) < 10e-3: lowerbound = tf.reduce_mean(logF) else: logF = tf.reshape(logF, [num_samples, batch_size]) logF = logF * (1 - alpha) logF_max = tf.reduce_max(logF, 0) logF = tf.log(tf.clip_by_value(tf.reduce_mean(tf.exp(logF - logF_max), 0), 1e-9, np.inf)) logF = (logF + logF_max) / (1 - alpha) lowerbound = tf.reduce_mean(logF) return lowerbound#, logpz, logpxz, logqzx
def body1(self, num, object_num, loss, predict, labels, nilboy): """ calculate loss Args: predict: 3-D tensor [cell_size, cell_size, 5 * boxes_per_cell] labels : [max_objects, 5] (x_center, y_center, w, h, class) """ label = labels[num:num + 1, :] label = tf.reshape(label, [-1]) #calculate objects tensor [CELL_SIZE, CELL_SIZE] min_x = (label[0] - label[2] / 2) / (self.image_size / self.cell_size) max_x = (label[0] + label[2] / 2) / (self.image_size / self.cell_size) min_y = (label[1] - label[3] / 2) / (self.image_size / self.cell_size) max_y = (label[1] + label[3] / 2) / (self.image_size / self.cell_size) min_x = tf.floor(min_x) min_y = tf.floor(min_y) max_x = tf.ceil(max_x) max_y = tf.ceil(max_y) temp = tf.cast(tf.pack([max_y - min_y, max_x - min_x]), dtype=tf.int32) objects = tf.ones(temp, tf.float32) temp = tf.cast( tf.pack( [min_y, self.cell_size - max_y, min_x, self.cell_size - max_x]), tf.int32) temp = tf.reshape(temp, (2, 2)) objects = tf.pad(objects, temp, "CONSTANT") #calculate objects tensor [CELL_SIZE, CELL_SIZE] #calculate responsible tensor [CELL_SIZE, CELL_SIZE] center_x = label[0] / (self.image_size / self.cell_size) center_x = tf.floor(center_x) center_y = label[1] / (self.image_size / self.cell_size) center_y = tf.floor(center_y) response = tf.ones([1, 1], tf.float32) temp = tf.cast( tf.pack([ center_y, self.cell_size - center_y - 1, center_x, self.cell_size - center_x - 1 ]), tf.int32) temp = tf.reshape(temp, (2, 2)) response = tf.pad(response, temp, "CONSTANT") #objects = response #calculate iou_predict_truth [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] predict_boxes = predict[:, :, self.num_classes + self.boxes_per_cell:] predict_boxes = tf.reshape( predict_boxes, [self.cell_size, self.cell_size, self.boxes_per_cell, 4]) predict_boxes = predict_boxes * [ self.image_size / self.cell_size, self.image_size / self.cell_size, self.image_size, self.image_size ] base_boxes = np.zeros([self.cell_size, self.cell_size, 4]) for y in range(self.cell_size): for x in range(self.cell_size): #nilboy base_boxes[y, x, :] = [ self.image_size / self.cell_size * x, self.image_size / self.cell_size * y, 0, 0 ] base_boxes = np.tile( np.resize(base_boxes, [self.cell_size, self.cell_size, 1, 4]), [1, 1, self.boxes_per_cell, 1]) predict_boxes = base_boxes + predict_boxes iou_predict_truth = self.iou(predict_boxes, label[0:4]) #calculate C [cell_size, cell_size, boxes_per_cell] C = iou_predict_truth * tf.reshape(response, [self.cell_size, self.cell_size, 1]) #calculate I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] I = iou_predict_truth * tf.reshape(response, (self.cell_size, self.cell_size, 1)) max_I = tf.reduce_max(I, 2, keep_dims=True) I = tf.cast((I >= max_I), tf.float32) * tf.reshape( response, (self.cell_size, self.cell_size, 1)) #calculate no_I tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] no_I = tf.ones_like(I, dtype=tf.float32) - I p_C = predict[:, :, self.num_classes:self.num_classes + self.boxes_per_cell] #calculate truth x,y,sqrt_w,sqrt_h 0-D x = label[0] y = label[1] sqrt_w = tf.sqrt(tf.abs(label[2])) sqrt_h = tf.sqrt(tf.abs(label[3])) #sqrt_w = tf.abs(label[2]) #sqrt_h = tf.abs(label[3]) #calculate predict p_x, p_y, p_sqrt_w, p_sqrt_h 3-D [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] p_x = predict_boxes[:, :, :, 0] p_y = predict_boxes[:, :, :, 1] #p_sqrt_w = tf.sqrt(tf.abs(predict_boxes[:, :, :, 2])) * ((tf.cast(predict_boxes[:, :, :, 2] > 0, tf.float32) * 2) - 1) #p_sqrt_h = tf.sqrt(tf.abs(predict_boxes[:, :, :, 3])) * ((tf.cast(predict_boxes[:, :, :, 3] > 0, tf.float32) * 2) - 1) #p_sqrt_w = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 2])) #p_sqrt_h = tf.sqrt(tf.maximum(0.0, predict_boxes[:, :, :, 3])) #p_sqrt_w = predict_boxes[:, :, :, 2] #p_sqrt_h = predict_boxes[:, :, :, 3] p_sqrt_w = tf.sqrt( tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 2]))) p_sqrt_h = tf.sqrt( tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 3]))) #calculate truth p 1-D tensor [NUM_CLASSES] P = tf.one_hot(tf.cast(label[4], tf.int32), self.num_classes, dtype=tf.float32) #calculate predict p_P 3-D tensor [CELL_SIZE, CELL_SIZE, NUM_CLASSES] p_P = predict[:, :, 0:self.num_classes] #class_loss class_loss = tf.nn.l2_loss( tf.reshape(objects, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale #class_loss = tf.nn.l2_loss(tf.reshape(response, (self.cell_size, self.cell_size, 1)) * (p_P - P)) * self.class_scale #object_loss object_loss = tf.nn.l2_loss(I * (p_C - C)) * self.object_scale #object_loss = tf.nn.l2_loss(I * (p_C - (C + 1.0)/2.0)) * self.object_scale #noobject_loss #noobject_loss = tf.nn.l2_loss(no_I * (p_C - C)) * self.noobject_scale noobject_loss = tf.nn.l2_loss(no_I * (p_C)) * self.noobject_scale #coord_loss coord_loss = (tf.nn.l2_loss(I * (p_x - x) / (self.image_size / self.cell_size)) + tf.nn.l2_loss(I * (p_y - y) / (self.image_size / self.cell_size)) + tf.nn.l2_loss(I * (p_sqrt_w - sqrt_w)) / self.image_size + tf.nn.l2_loss(I * (p_sqrt_h - sqrt_h)) / self.image_size) * self.coord_scale nilboy = I return num + 1, object_num, [ loss[0] + class_loss, loss[1] + object_loss, loss[2] + noobject_loss, loss[3] + coord_loss ], predict, labels, nilboy
def loss_layer(self, feature_map_i, y_true, anchors): # size in [h, w] format! don't get messed up! grid_size = tf.shape(feature_map_i)[1:3] grid_size_ = feature_map_i.shape.as_list()[1:3] y_true = tf.reshape(y_true, [-1, grid_size_[0], grid_size_[1], 3, 5+self._NUM_CLASSES]) # the downscale ratio in height and weight ratio = tf.cast(self.img_size / grid_size, tf.float32) # N: batch_size N = tf.cast(tf.shape(feature_map_i)[0], tf.float32) x_y_offset, pred_boxes, pred_conf_logits, pred_prob_logits = self._reorg_layer(feature_map_i, anchors) # shape: take 416x416 input image and 13*13 feature_map for example: # [N, 13, 13, 3, 1] object_mask = y_true[..., 4:5] # shape: [N, 13, 13, 3, 4] & [N, 13, 13, 3] ==> [V, 4] # V: num of true gt box valid_true_boxes = tf.boolean_mask(y_true[..., 0:4], tf.cast(object_mask[..., 0], 'bool')) # shape: [V, 2] valid_true_box_xy = valid_true_boxes[:, 0:2] valid_true_box_wh = valid_true_boxes[:, 2:4] # shape: [N, 13, 13, 3, 2] pred_box_xy = pred_boxes[..., 0:2] pred_box_wh = pred_boxes[..., 2:4] # calc iou # shape: [N, 13, 13, 3, V] iou = self._broadcast_iou(valid_true_box_xy, valid_true_box_wh, pred_box_xy, pred_box_wh) # shape: [N, 13, 13, 3] best_iou = tf.reduce_max(iou, axis=-1) # get_ignore_mask ignore_mask = tf.cast(best_iou < 0.5, tf.float32) # shape: [N, 13, 13, 3, 1] ignore_mask = tf.expand_dims(ignore_mask, -1) # get xy coordinates in one cell from the feature_map # numerical range: 0 ~ 1 # shape: [N, 13, 13, 3, 2] true_xy = y_true[..., 0:2] / ratio[::-1] - x_y_offset pred_xy = pred_box_xy / ratio[::-1] - x_y_offset # get_tw_th, numerical range: 0 ~ 1 # shape: [N, 13, 13, 3, 2] true_tw_th = y_true[..., 2:4] / anchors pred_tw_th = pred_box_wh / anchors # for numerical stability true_tw_th = tf.where(condition=tf.equal(true_tw_th, 0), x=tf.ones_like(true_tw_th), y=true_tw_th) pred_tw_th = tf.where(condition=tf.equal(pred_tw_th, 0), x=tf.ones_like(pred_tw_th), y=pred_tw_th) true_tw_th = tf.log(tf.clip_by_value(true_tw_th, 1e-9, 1e9)) pred_tw_th = tf.log(tf.clip_by_value(pred_tw_th, 1e-9, 1e9)) # box size punishment: # box with smaller area has bigger weight. This is taken from the yolo darknet C source code. # shape: [N, 13, 13, 3, 1] box_loss_scale = 2. - (y_true[..., 2:3] / tf.cast(self.img_size[1], tf.float32)) * (y_true[..., 3:4] / tf.cast(self.img_size[0], tf.float32)) # shape: [N, 13, 13, 3, 1] xy_loss = tf.reduce_sum(tf.square(true_xy - pred_xy) * object_mask * box_loss_scale) / N wh_loss = tf.reduce_sum(tf.square(true_tw_th - pred_tw_th) * object_mask * box_loss_scale) / N # shape: [N, 13, 13, 3, 1] conf_pos_mask = object_mask conf_neg_mask = (1 - object_mask) * ignore_mask conf_loss_pos = conf_pos_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=object_mask, logits=pred_conf_logits) conf_loss_neg = conf_neg_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=object_mask, logits=pred_conf_logits) conf_loss = tf.reduce_sum(conf_loss_pos + conf_loss_neg) / N # shape: [N, 13, 13, 3, 1] class_loss = object_mask * tf.nn.sigmoid_cross_entropy_with_logits(labels=y_true[..., 5:], logits=pred_prob_logits) class_loss = tf.reduce_sum(class_loss) / N return xy_loss, wh_loss, conf_loss, class_loss
def crop_proposal(): rand_vec = lambda minval, maxval: tf.random_uniform(shape=( ssd_constants.NUM_CROP_PASSES, 1), minval=minval, maxval=maxval, dtype=tf.float32) width, height = rand_vec(0.3, 1), rand_vec(0.3, 1) left, top = rand_vec(0, 1 - width), rand_vec(0, 1 - height) right = left + width bottom = top + height ltrb = tf.concat([left, top, right, bottom], axis=1) min_iou = tf.random_shuffle(ssd_constants.CROP_MIN_IOU_CHOICES)[0] ious = calc_iou_tensor(ltrb, boxes) # discard any bboxes whose center not in the cropped image xc, yc = [ tf.tile(0.5 * (boxes[:, i + 0] + boxes[:, i + 2])[tf.newaxis, :], (ssd_constants.NUM_CROP_PASSES, 1)) for i in range(2) ] masks = tf.reduce_all(tf.stack([ tf.greater(xc, tf.tile(left, (1, num_boxes))), tf.less(xc, tf.tile(right, (1, num_boxes))), tf.greater(yc, tf.tile(top, (1, num_boxes))), tf.less(yc, tf.tile(bottom, (1, num_boxes))), ], axis=2), axis=2) # Checks of whether a crop is valid. valid_aspect = tf.logical_and(tf.less(height / width, 2), tf.less(width / height, 2)) valid_ious = tf.reduce_all(tf.greater(ious, min_iou), axis=1, keepdims=True) valid_masks = tf.reduce_any(masks, axis=1, keepdims=True) valid_all = tf.cast( tf.reduce_all(tf.concat([valid_aspect, valid_ious, valid_masks], axis=1), axis=1), tf.int32) # One indexed, as zero is needed for the case of no matches. index = tf.range(1, 1 + ssd_constants.NUM_CROP_PASSES, dtype=tf.int32) # Either one-hot, or zeros if there is no valid crop. selection = tf.equal(tf.reduce_max(index * valid_all), index) use_crop = tf.reduce_any(selection) output_ltrb = tf.reduce_sum(tf.multiply( ltrb, tf.tile(tf.cast(selection, tf.float32)[:, tf.newaxis], (1, 4))), axis=0) output_masks = tf.reduce_any(tf.logical_and( masks, tf.tile(selection[:, tf.newaxis], (1, num_boxes))), axis=0) return use_crop, output_ltrb, output_masks
import tensorflow as tf #행렬 a = tf.zeros([2, 10]) print(a) #reduce 함수 b = tf.constant([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]) print(tf.reduce_sum(b)) print(tf.reduce_mean(b)) print(tf.reduce_max(b)) print(tf.reduce_min(b)) #브로드캐스팅 c = b + 5 print(c) #행렬 연산 a = tf.constant([[1, 2, 3], [4, 5, 6]]) b = tf.constant([[10, 11],[21, 22], [30, 31]]) c = tf.matmul(a, b) print(c)
def _sample_max(values): """Max over sample indices. In this module this is always [0].""" return tf.reduce_max(values, reduction_indices=[0])
def softmax(self, target, axis, name=None): max_axis = tf.reduce_max(target, axis, keepdims=True) target_exp = tf.exp(target - max_axis) normalize = tf.reduce_sum(target_exp, axis, keepdims=True) softmax = tf.div(target_exp, normalize, name) return softmax
state = state_next if frame % update_after_actions == 0 and len( done_history) > batch_size: # Chooses random samples from our history: indices = np.random.choice(range(len(done_history)), size=batch_size) state_sample = np.array([state_history[i] for i in indices]) state_next_sample = np.array( [state_next_history[i] for i in indices]) action_sample = np.array([action_history[i] for i in indices]) reward_sample = np.array([reward_history[i] for i in indices]) done_sample = tf.convert_to_tensor( [float(done_history[i]) for i in indices]) future_rewards = model_target.predict(state_next_sample) updated_q_values = reward_sample + gamma * tf.reduce_max( future_rewards, axis=1) updated_q_values = updated_q_values * (1 - done_sample) - done_sample masks = tf.one_hot(action_sample, 2) with tf.GradientTape() as tape: q_values = model(state_sample) q_action = tf.reduce_sum(tf.multiply(q_values, masks), axis=1) loss = loss_function(updated_q_values, q_action) # Backpropagation grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) # Updating target model with main model weights if frame % update_target_model == 0:
## registration parameters image_loss_name = "ssd" learning_rate = 0.01 total_iter = int(1000) ## load image if not os.path.exists(DATA_PATH): raise ("Download the data using demo_data.py script") if not os.path.exists(FILE_PATH): raise ("Download the data using demo_data.py script") fid = h5py.File(FILE_PATH, "r") fixed_image = tf.cast(tf.expand_dims(fid["image"], axis=0), dtype=tf.float32) fixed_image = (fixed_image - tf.reduce_min(fixed_image)) / ( tf.reduce_max(fixed_image) - tf.reduce_min(fixed_image) ) # normalisation to [0,1] # generate a radomly-affine-transformed moving image fixed_image_size = fixed_image.shape transform_random = util.random_transform_generator(batch_size=1, scale=0.2) grid_ref = util.get_reference_grid(grid_size=fixed_image_size[1:4]) grid_random = util.warp_grid(grid_ref, transform_random) moving_image = util.resample(vol=fixed_image, loc=grid_random) # warp the labels to get ground-truth using the same random affine, for validation fixed_labels = tf.cast(tf.expand_dims(fid["label"], axis=0), dtype=tf.float32) moving_labels = tf.stack( [ util.resample(vol=fixed_labels[..., idx], loc=grid_random) for idx in range(fixed_labels.shape[4]) ],
plt.savefig('test_20_yfield_diff.png') plt.figure() imshow_center(np.squeeze(X_test[20, :, :, 1]) - zf) plt.title( "error between forwardly solved z-field from prediction and true input z-field" ) plt.savefig('test_20_zfield_diff.png') # calculate final test loss -------------------------------------------------------------------------------------------------------------------- final_loss = custom_loss_rmse(test_labels_t2b, y_pred_ht2) print('final RMSE loss on test set:', final_loss.numpy()) NRMSE = final_loss / K.mean(test_labels_t2b) print('final normalized RMSE loss (div mean) on the test set:', NRMSE.numpy()) RMSE_range = final_loss / (tf.reduce_max(test_labels_t2b) - tf.reduce_min(test_labels_t2b)) print('final normalized RMSE loss (div range) on the test set:', RMSE_range.numpy()) test_arr = tf.keras.backend.flatten(test_labels_t2b).numpy() IQR = stats.iqr(test_arr) RMSE_IQR = final_loss / IQR print('final normalized RMSE loss (div IQR) on the test set:', RMSE_IQR.numpy()) print('final norm of the difference tensor:', tf.norm(y_pred_ht2 - test_labels_t2b).numpy()) Boll_NRMSE = tf.norm(y_pred_ht2 - test_labels_t2b) / tf.norm(test_labels_t2b) print('final Bollman normalized RMSE loss on the test set:', Boll_NRMSE.numpy()) # view final loss per battery --------------------------------------------------------------------------------------------------------------------
# ### Placeholders # In[22]: x_data = np.random.randn(4,3) w_data = np.random.randn(3,1) with tf.Graph().as_default(): x = tf.placeholder(tf.float32,shape=(4,3)) w = tf.placeholder(tf.float32,shape=(3,1)) b = tf.fill((4,1),-1.) xw = tf.matmul(x,w) xwb = xw + b s = tf.reduce_max(xwb) with tf.Session() as sess: print("x_data: \n{}".format(x_data)) print("w_data: \n{}".format(w_data)) print("xw: \n", sess.run(xw, \ feed_dict={x: x_data,w: w_data})) print("xwb: \n", sess.run(xwb, \ feed_dict={x: x_data,w: w_data})) outs = sess.run(s,feed_dict={x: x_data,w: w_data}) print("outs = {}".format(outs))
def fidnet_doRecon2D(model_weights, file, ss_file, max_points, outfile, f1180='y', shift='n'): if f1180.lower() in ['y', 'n']: if f1180.lower() == 'y': f1180 = True else: f1180 = False if shift.lower() in ['y', 'n']: if shift.lower() == 'y': shift = True else: shift = False dic, data = ng.pipe.read(file) model = build_model() model.load_weights(model_weights) ss = load_ss(ss_file, max_points) ind_points = data.shape[0] # sampled points in indirect dim dir_points = data.shape[1] # sampled points in direct dim if ind_points > 512: print('the input spectrum contains too many sampled points') print('the network can have a maximum of 256 complex points in the') print('reconstructed spectra. Please reduce the size of the input') print('aborting now...') sys.exit() if ss.shape[0] == ind_points // 2: print( 'number of recorded points in indirect dimension matches sampling schedule' ) print('proceeding with reconstruction...') else: print( 'there is a mis-match between the sampling schedule and number of recorded points' ) print( 'in the indirect dimension. Please check the sampling schedule or your input spectrum' ) print('may need to be transposed') print('aborting now...') sys.exit() if max_points > 256: print( 'the maximum size of the final spectrum is 256 complex points in') print( 'the indirect dimension. The output will be truncated at this point' ) max_points = 256 data = expand_data(data, ss, max_points, dir_points) data = tf.convert_to_tensor(data) dl_dic = make_dl_dic(dic, max_points) shape = tf.shape(data).numpy() max_val = tf.reduce_max(data) data = data / max_val Hpoints = shape[1] Npoints = shape[0] padding_2 = [[0, 512 - tf.shape(data)[0]], [0, 0]] data_samp = tf.pad(data, padding_2, 'Constant', constant_values=0.0) data_samp = tf.transpose(data_samp) padding_recon = [[3, 3], [0, 0]] data_samp = tf.pad(data_samp, padding_recon, 'Constant', constant_values=0.0) scale = np.array( [np.max(np.fabs(data_samp[i:i + 4, :])) for i in range((Hpoints + 3))]) sampy = np.zeros((scale.shape[0], 4, tf.shape(data_samp)[1])) for i in range(scale.shape[0]): sampy[i, :, :] = data_samp[i:i + 4, :] samp_av = tf.convert_to_tensor(sampy) samp_av = tf.transpose(samp_av, perm=[1, 2, 0]) samp_av = samp_av / scale samp_av = tf.transpose(samp_av, perm=[2, 1, 0]) samp_av = tf.expand_dims(samp_av, axis=3) data = tf.expand_dims(data, axis=0) res = model.predict(samp_av) res = tf.convert_to_tensor(res[0]) res = rescale_dat(res, scale) res_keep = copy.deepcopy(res) res = get_average_results(res, Hpoints) res = res[:, :Npoints, :, 0] res_ft = ft_second(res, npoints1=Hpoints, npoints2=Npoints, f1180=f1180, shift=shift) data_ft = ft_second(data, npoints1=Hpoints, npoints2=Npoints, f1180=f1180, shift=shift) data_ft = data_ft / tf.reduce_max(data_ft) res_ft = res_ft / tf.reduce_max(res_ft) ng.pipe.write(outfile, dl_dic, res.numpy()[0], overwrite=True) ax1 = plt.subplot(2, 2, 1) ax2 = plt.subplot(2, 2, 3) ax3 = plt.subplot(2, 2, 2) ax4 = plt.subplot(2, 2, 4) plot_contour(ax1, data) plot_contour(ax2, data_ft, invert=True) plot_contour(ax3, res) plot_contour(ax4, res_ft, invert=True) plt.show() get_ind_spectra(res_keep, res_ft, Hpoints, Npoints, dl_dic, f1180=f1180, shift=shift)
def softmax(x, axis=-1): x = x - tf.reduce_max(x, axis=axis, keepdims=True) ex = tf.exp(x) sfm = ex / tf.reduce_sum(ex, axis=axis, keepdims=True) print("softmax result \t", sfm) return sfm
def match_passage_with_question(passage_reps, question_reps, passage_mask, question_mask, passage_lengths, question_lengths, context_lstm_dim, scope=None, with_full_match=True, with_maxpool_match=True, with_attentive_match=True, with_max_attentive_match=True, is_training=True, options=None, dropout_rate=0, forward=True): passage_reps = tf.multiply(passage_reps, tf.expand_dims(passage_mask, -1)) question_reps = tf.multiply(question_reps, tf.expand_dims(question_mask, -1)) all_question_aware_representatins = [] dim = 0 with tf.variable_scope(scope or "match_passage_with_question"): relevancy_matrix = cal_relevancy_matrix(question_reps, passage_reps) relevancy_matrix = mask_relevancy_matrix(relevancy_matrix, question_mask, passage_mask) # relevancy_matrix = layer_utils.calcuate_attention(passage_reps, question_reps, context_lstm_dim, context_lstm_dim, # scope_name="fw_attention", att_type=options.att_type, att_dim=options.att_dim, # remove_diagnoal=False, mask1=passage_mask, mask2=question_mask, is_training=is_training, dropout_rate=dropout_rate) all_question_aware_representatins.append( tf.reduce_max(relevancy_matrix, axis=2, keep_dims=True)) all_question_aware_representatins.append( tf.reduce_mean(relevancy_matrix, axis=2, keep_dims=True)) dim += 2 if with_full_match: if forward: question_full_rep = layer_utils.collect_final_step_of_lstm( question_reps, question_lengths - 1) else: question_full_rep = question_reps[:, 0, :] passage_len = tf.shape(passage_reps)[1] question_full_rep = tf.expand_dims(question_full_rep, axis=1) question_full_rep = tf.tile( question_full_rep, [1, passage_len, 1]) # [batch_size, pasasge_len, feature_dim] (attentive_rep, match_dim) = multi_perspective_match( context_lstm_dim, passage_reps, question_full_rep, is_training=is_training, dropout_rate=options['dropout_rate'], options=options, scope_name='mp-match-full-match') all_question_aware_representatins.append(attentive_rep) dim += match_dim if with_maxpool_match: maxpooling_decomp_params = tf.get_variable( "maxpooling_matching_decomp", shape=[options['cosine_MP_dim'], context_lstm_dim], dtype=tf.float32) maxpooling_rep = cal_maxpooling_matching(passage_reps, question_reps, maxpooling_decomp_params) all_question_aware_representatins.append(maxpooling_rep) dim += 2 * options['cosine_MP_dim'] if with_attentive_match: atten_scores = layer_utils.calcuate_attention( passage_reps, question_reps, context_lstm_dim, context_lstm_dim, scope_name="attention", att_type=options['att_type'], att_dim=options['att_dim'], remove_diagnoal=False, mask1=passage_mask, mask2=question_mask, is_training=is_training, dropout_rate=dropout_rate) att_question_contexts = tf.matmul(atten_scores, question_reps) (attentive_rep, match_dim) = multi_perspective_match( context_lstm_dim, passage_reps, att_question_contexts, is_training=is_training, dropout_rate=options['dropout_rate'], options=options, scope_name='mp-match-att_question') all_question_aware_representatins.append(attentive_rep) dim += match_dim if with_max_attentive_match: max_att = cal_max_question_representation(question_reps, relevancy_matrix) (max_attentive_rep, match_dim) = multi_perspective_match( context_lstm_dim, passage_reps, max_att, is_training=is_training, dropout_rate=options['dropout_rate'], options=options, scope_name='mp-match-max-att') all_question_aware_representatins.append(max_attentive_rep) dim += match_dim all_question_aware_representatins = tf.concat( axis=2, values=all_question_aware_representatins) return (all_question_aware_representatins, dim)
def beam_search(decoder, init_hidden_state, init_token, eos_token, beam_size, vocab_size, max_seq_len=MAX_SEQ_LEN): """Beam search. Keeps beam_size living sequences at each iteration, and beam_size completed sequences at each iteration. Completes when all living sequences have dropped far enough in probability that no living sequences have any chance of beating one of the known completed sequences, or if the search limit has been reached. If, at the end, an incomplete sequence with max_seq_len has higher probability than any complete sequence, then it will be ranked higher than the completed sequence. Args: decoder: Decoder module. init_hidden_state: A hidden state representing decoding context. Should have a batch dimension with size 1. init_token: Token to seed decoding with. eos_token: Token to compare against to see if sequence is ended. beam_size: beam size. vocab_size: vocab size. max_seq_len: Maximum seq len before stopping and returning what we have. Returns: Tuple of sequences, log probs. sequences: Tensor of shape [beam_size, max_seq_len] log_probs: Tensor of shape [beam_size] """ init_logits, hidden_state = decoder(tf.constant([init_token]), init_hidden_state) start_logprobs = tf.nn.log_softmax(tf.squeeze(init_logits)) # Seed the starting sequences by executing decoder once and taking top k. # [beam_size], [beam_size] alive_logprobs, alive_indices = tf.nn.top_k(start_logprobs, k=beam_size) # [beam_size, max_seq_len] alive_sequences = tf.concat([ tf.expand_dims(alive_indices, 1), tf.zeros([beam_size, max_seq_len - 1], dtype=tf.int32)], axis=1) # [[beam_size, hidden_size], ...] alive_hidden = tf.nest.map_structure( lambda s: tf.tile(s, [beam_size, 1]), hidden_state) # Seed finished sequences as the empty sequence, i.e. [<EOS>, 0, 0...] and # zeros everywhere else. # Mark all other sequences with logprob = -INF finished_sequences = eos_token * tf.one_hot( [0], beam_size * max_seq_len, dtype=tf.int32) finished_sequences = tf.reshape(finished_sequences, [beam_size, max_seq_len]) finished_logprobs = tf.where( tf.equal(tf.one_hot(0, beam_size), 1), tf.tile([start_logprobs[eos_token]], [beam_size]), tf.tile([NEG_INF], [beam_size])) for i in tf.range(1, max_seq_len): # [beam_size, vocab_size], [[beam_size, hidden_size], ..] next_char_logits, hidden_state = decoder(alive_indices, alive_hidden) # Adding log probabilities is equivalent to multiplying probabilities. # [beam_size, vocab_size] cumulative_logprob = (tf.expand_dims(alive_logprobs, 1) + tf.nn.log_softmax(next_char_logits)) # Pad all the finished/alive sequences so that they maintain the same shape # with each iteration. (A limitation of AutoGraph-generated tf.while_loops.) sequence_padding = tf.zeros([beam_size, max_seq_len - i - 1], dtype=tf.int32) # Gather sequences/log probs for finished sequences newly_finished_sequences = tf.concat([ alive_sequences[:, :i], tf.tile([[eos_token]], [beam_size, 1]), sequence_padding], axis=1) newly_finished_logprobs = cumulative_logprob[:, eos_token] finished_sequences, finished_logprobs = get_best_sequences( beam_size, finished_sequences, finished_logprobs, newly_finished_sequences, newly_finished_logprobs) # Gather sequences/log probs for alive sequences chosen_sequences, alive_logprobs, alive_indices = get_best_alive( cumulative_logprob, eos_token, beam_size, vocab_size) new_sequence_history = tf.gather(alive_sequences, chosen_sequences) # [beam_size, max_seq_len] alive_sequences = tf.concat([ new_sequence_history[:, :i], tf.expand_dims(alive_indices, 1), sequence_padding], axis=1) alive_sequences.set_shape([beam_size, max_seq_len]) # [[beam_size, hidden_size], ...] alive_hidden = tf.nest.map_structure( lambda s: tf.gather(s, chosen_sequences), # pylint: disable=cell-var-from-loop hidden_state) # Exit if all alive sequences are worse than any finished sequence. if tf.reduce_min(finished_logprobs) > tf.reduce_max(alive_logprobs): break # Execute one final collation, just in case any of the alive sequences are # higher in probability than any of the finished sequences. finished_sequences, finished_logprobs = get_best_sequences( beam_size, finished_sequences, finished_logprobs, alive_sequences, alive_logprobs) return finished_sequences, finished_logprobs
def _compute_one_image_loss(self, armpbbox_yx, armpbbox_hw, armpconf, odmpbbox_yx, odmpbbox_hw, odmpconf, abbox_y1x1, abbox_y2x2, abbox_yx, abbox_hw, ground_truth): slice_index = tf.argmin(ground_truth, axis=0)[0] ground_truth = tf.gather(ground_truth, tf.range(0, slice_index, dtype=tf.int64)) gbbox_yx = ground_truth[..., 0:2] gbbox_hw = ground_truth[..., 2:4] gbbox_y1x1 = gbbox_yx - gbbox_hw / 2. gbbox_y2x2 = gbbox_yx + gbbox_hw / 2. class_id = tf.cast(ground_truth[..., 4:5], dtype=tf.int32) label = class_id abbox_hwti = tf.reshape(abbox_hw, [1, -1, 2]) abbox_y1x1ti = tf.reshape(abbox_y1x1, [1, -1, 2]) abbox_y2x2ti = tf.reshape(abbox_y2x2, [1, -1, 2]) gbbox_hwti = tf.reshape(gbbox_hw, [-1, 1, 2]) gbbox_y1x1ti = tf.reshape(gbbox_y1x1, [-1, 1, 2]) gbbox_y2x2ti = tf.reshape(gbbox_y2x2, [-1, 1, 2]) ashape = tf.shape(abbox_hwti) gshape = tf.shape(gbbox_hwti) abbox_hwti = tf.tile(abbox_hwti, [gshape[0], 1, 1]) abbox_y1x1ti = tf.tile(abbox_y1x1ti, [gshape[0], 1, 1]) abbox_y2x2ti = tf.tile(abbox_y2x2ti, [gshape[0], 1, 1]) gbbox_hwti = tf.tile(gbbox_hwti, [1, ashape[1], 1]) gbbox_y1x1ti = tf.tile(gbbox_y1x1ti, [1, ashape[1], 1]) gbbox_y2x2ti = tf.tile(gbbox_y2x2ti, [1, ashape[1], 1]) gaiou_y1x1ti = tf.maximum(abbox_y1x1ti, gbbox_y1x1ti) gaiou_y2x2ti = tf.minimum(abbox_y2x2ti, gbbox_y2x2ti) gaiou_area = tf.reduce_prod(tf.maximum(gaiou_y2x2ti - gaiou_y1x1ti, 0), axis=-1) aarea = tf.reduce_prod(abbox_hwti, axis=-1) garea = tf.reduce_prod(gbbox_hwti, axis=-1) gaiou_rate = gaiou_area / (aarea + garea - gaiou_area) best_raindex = tf.argmax(gaiou_rate, axis=1) best_armpbbox_yx = tf.gather(armpbbox_yx, best_raindex) best_armpbbox_hw = tf.gather(armpbbox_hw, best_raindex) best_armpconf = tf.gather(armpconf, best_raindex) best_odmpbbox_yx = tf.gather(odmpbbox_yx, best_raindex) best_odmpbbox_hw = tf.gather(odmpbbox_hw, best_raindex) best_odmpconf = tf.gather(odmpconf, best_raindex) best_abbox_yx = tf.gather(abbox_yx, best_raindex) best_abbox_hw = tf.gather(abbox_hw, best_raindex) bestmask, _ = tf.unique(best_raindex) bestmask = tf.contrib.framework.sort(bestmask) bestmask = tf.reshape(bestmask, [-1, 1]) bestmask = tf.sparse.SparseTensor(tf.concat([bestmask, tf.zeros_like(bestmask)], axis=-1), tf.squeeze(tf.ones_like(bestmask)), dense_shape=[ashape[1], 1]) bestmask = tf.reshape(tf.cast(tf.sparse.to_dense(bestmask), tf.float32), [-1]) othermask = 1. - bestmask othermask = othermask > 0. other_armpbbox_yx = tf.boolean_mask(armpbbox_yx, othermask) other_armpbbox_hw = tf.boolean_mask(armpbbox_hw, othermask) other_armpconf = tf.boolean_mask(armpconf, othermask) other_odmpbbox_yx = tf.boolean_mask(odmpbbox_yx, othermask) other_odmpbbox_hw = tf.boolean_mask(odmpbbox_hw, othermask) other_odmpconf = tf.boolean_mask(odmpconf, othermask) other_abbox_yx = tf.boolean_mask(abbox_yx, othermask) other_abbox_hw = tf.boolean_mask(abbox_hw, othermask) agiou_rate = tf.transpose(gaiou_rate) other_agiou_rate = tf.boolean_mask(agiou_rate, othermask) max_agiou_rate = tf.reduce_max(other_agiou_rate, axis=1) pos_agiou_mask = max_agiou_rate > 0.5 neg_agiou_mask = max_agiou_rate < 0.4 rgindex = tf.argmax(other_agiou_rate, axis=1) pos_rgindex = tf.boolean_mask(rgindex, pos_agiou_mask) pos_armppox_yx = tf.boolean_mask(other_armpbbox_yx, pos_agiou_mask) pos_armppox_hw = tf.boolean_mask(other_armpbbox_hw, pos_agiou_mask) pos_armpconf = tf.boolean_mask(other_armpconf, pos_agiou_mask) pos_odmppox_yx = tf.boolean_mask(other_odmpbbox_yx, pos_agiou_mask) pos_odmppox_hw = tf.boolean_mask(other_odmpbbox_hw, pos_agiou_mask) pos_odmpconf = tf.boolean_mask(other_odmpconf, pos_agiou_mask) pos_abbox_yx = tf.boolean_mask(other_abbox_yx, pos_agiou_mask) pos_abbox_hw = tf.boolean_mask(other_abbox_hw, pos_agiou_mask) pos_odmlabel = tf.gather(label, pos_rgindex) pos_gbbox_yx = tf.gather(gbbox_yx, pos_rgindex) pos_gbbox_hw = tf.gather(gbbox_hw, pos_rgindex) neg_armpconf = tf.boolean_mask(other_armpconf, neg_agiou_mask) neg_armabbox_yx = tf.boolean_mask(other_abbox_yx, neg_agiou_mask) neg_armabbox_hw = tf.boolean_mask(other_abbox_hw, neg_agiou_mask) neg_armabbox_y1x1y2x2 = tf.concat([neg_armabbox_yx - neg_armabbox_hw/2., neg_armabbox_yx + neg_armabbox_hw/2.], axis=-1) neg_odmpconf = tf.boolean_mask(other_odmpconf, neg_agiou_mask) total_pos_armpbbox_yx = tf.concat([best_armpbbox_yx, pos_armppox_yx], axis=0) total_pos_armpbbox_hw = tf.concat([best_armpbbox_hw, pos_armppox_hw], axis=0) total_pos_armpconf = tf.concat([best_armpconf, pos_armpconf], axis=0) total_pos_odmpbbox_yx = tf.concat([best_odmpbbox_yx, pos_odmppox_yx], axis=0) total_pos_odmpbbox_hw = tf.concat([best_odmpbbox_hw, pos_odmppox_hw], axis=0) total_pos_odmpconf = tf.concat([best_odmpconf, pos_odmpconf], axis=0) total_pos_odmlabel = tf.concat([label, pos_odmlabel], axis=0) total_pos_gbbox_yx = tf.concat([gbbox_yx, pos_gbbox_yx], axis=0) total_pos_gbbox_hw = tf.concat([gbbox_hw, pos_gbbox_hw], axis=0) total_pos_abbox_yx = tf.concat([best_abbox_yx, pos_abbox_yx], axis=0) total_pos_abbox_hw = tf.concat([best_abbox_hw, pos_abbox_hw], axis=0) num_pos = tf.shape(total_pos_odmlabel)[0] num_armneg = tf.shape(neg_armpconf)[0] chosen_num_armneg = tf.cond(num_armneg > 3*num_pos, lambda: 3*num_pos, lambda: num_armneg) neg_armclass_id = tf.constant([1]) pos_armclass_id = tf.constant([0]) neg_armlabel = tf.tile(neg_armclass_id, [num_armneg]) pos_armlabel = tf.tile(pos_armclass_id, [num_pos]) total_neg_armloss = tf.losses.sparse_softmax_cross_entropy(neg_armlabel, neg_armpconf, reduction=tf.losses.Reduction.NONE) selected_armindices = tf.image.non_max_suppression( neg_armabbox_y1x1y2x2, total_neg_armloss, chosen_num_armneg, iou_threshold=0.7 ) neg_armloss = tf.reduce_mean(tf.gather(total_neg_armloss, selected_armindices)) chosen_neg_armpconf = tf.gather(neg_armpconf, selected_armindices) chosen_neg_odmpconf = tf.gather(neg_odmpconf, selected_armindices) neg_odm_mask = chosen_neg_armpconf[:, 1] < 0.99 chosen_neg_odmpconf = tf.boolean_mask(chosen_neg_odmpconf, neg_odm_mask) chosen_num_odmneg = tf.shape(chosen_neg_odmpconf)[0] neg_odmclass_id = tf.constant([self.num_classes-1]) neg_odmlabel = tf.tile(neg_odmclass_id, [chosen_num_odmneg]) neg_odmloss = tf.losses.sparse_softmax_cross_entropy(neg_odmlabel, chosen_neg_odmpconf, reduction=tf.losses.Reduction.MEAN) pos_armconf_loss = tf.losses.sparse_softmax_cross_entropy(pos_armlabel, total_pos_armpconf, reduction=tf.losses.Reduction.MEAN) pos_truth_armpbbox_yx = (total_pos_gbbox_yx - total_pos_abbox_yx) / total_pos_abbox_hw pos_truth_armpbbox_hw = tf.log(total_pos_gbbox_hw / total_pos_abbox_hw) pos_yx_armloss = tf.reduce_sum(self._smooth_l1_loss(total_pos_armpbbox_yx - pos_truth_armpbbox_yx), axis=-1) pos_hw_armloss = tf.reduce_sum(self._smooth_l1_loss(total_pos_armpbbox_hw - pos_truth_armpbbox_hw), axis=-1) pos_coord_armloss = tf.reduce_mean(pos_yx_armloss + pos_hw_armloss) arm_yx = total_pos_armpbbox_yx * total_pos_abbox_hw + total_pos_abbox_yx arm_hw = tf.exp(total_pos_armpbbox_hw) * total_pos_abbox_hw pos_odmconf_loss = tf.losses.sparse_softmax_cross_entropy(total_pos_odmlabel, total_pos_odmpconf, reduction=tf.losses.Reduction.MEAN) pos_truth_odmpbbox_yx = (total_pos_gbbox_yx - arm_yx) / arm_hw pos_truth_odmpbbox_hw = tf.log(total_pos_gbbox_hw / arm_hw) pos_yx_odmloss = tf.reduce_sum(self._smooth_l1_loss(total_pos_odmpbbox_yx - pos_truth_odmpbbox_yx), axis=-1) pos_hw_odmloss = tf.reduce_sum(self._smooth_l1_loss(total_pos_odmpbbox_hw - pos_truth_odmpbbox_hw), axis=-1) pos_coord_odmloss = tf.reduce_mean(pos_yx_odmloss + pos_hw_odmloss) armloss = neg_armloss + pos_armconf_loss + pos_coord_armloss odmloss = neg_odmloss + pos_odmconf_loss + pos_coord_odmloss return armloss + odmloss
def reduce_max(input): return tf.reduce_max(input)
def build_assembled_classification(classify_graph_def_paths, classification_class_index_map, input_output_tensor_prefix=""): graph = tf.Graph() with graph.as_default(): encoded_image = tf.placeholder(tf.string, name=input_output_tensor_prefix + "encoded_image") image_tensor = tf.image.decode_image(encoded_image, channels=3) shp = tf.shape(image_tensor) input_image_tensor = tf.reshape(image_tensor, tf.stack([-1, shp[-3], shp[-2], 3]), name=input_output_tensor_prefix + "input_images") # classification phase all_probability = [] all_features = [] for cls_graph_def_path in classify_graph_def_paths: prefix = get_input_output_tensor_prefix(cls_graph_def_path) classify_graph_, c_input_images_, c_outputs_ = \ load_frozen_classify_graph(cls_graph_def_path, input_map={"input_images": input_image_tensor}, input_output_tensor_prefix=prefix) probability = tf.nn.softmax(c_outputs_["logits"]) all_probability.append(probability) all_features.append(c_outputs_["features"]) probability = tf.reduce_mean(tf.stack(all_probability, axis=0), axis=0) features = tf.reduce_mean(tf.stack(all_features, axis=0), axis=0) c_outputs = { "scores": tf.reduce_max(probability, axis=-1), "predict": tf.argmax(probability, axis=-1), "features": features } c_logits = tf.identity(tf.log(1e-9 + probability), name=input_output_tensor_prefix + "logits") c_classes = tf.identity(c_outputs["predict"], name=input_output_tensor_prefix + "predict") c_scores = tf.identity(c_outputs["scores"], name=input_output_tensor_prefix + "scores") c_features = tf.identity(c_outputs["features"], name=input_output_tensor_prefix + "features") # add class index map fake_input = tf.placeholder(tf.int32) cls_class2index_map = { key: tf.constant(val) for key, val in classification_class_index_map.items() } inputs = { "classify_encoded_image": encoded_image, "classify_image_tensor": input_image_tensor, "fake_input": fake_input } outputs = { "classify_scores": c_scores, "classify_classes": c_classes, "classify_features": c_features, "classify_class_index_map": cls_class2index_map } return graph, inputs, outputs
def max_out(inputs, outputs): return tf.reduce_max([inputs, outputs], axis=0)
def CNN_model(input_x, input_ys, sent_length, category_index, dropout_keep_prob): """Two-level CNN architecture""" # category lookup target_embeddings = tf.get_variable( name="target_embeddings", dtype=tf.float32, shape=[HP.n_category, HP.dim_category]) embedded_category = tf.nn.embedding_lookup(target_embeddings, category_index, name="target_embeddings") # [n_batch, n_doc,dim_category] # =============================== reshape to do word level CNN ============================================================== # x = tf.reshape(input_x, [-1, HP.max_sentence_length, HP.embedding_size]) pooled_outputs = [] for i, filter_size in enumerate(HP.filter_sizes): with tf.name_scope("conv-maxpool-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, HP.embedding_size, HP.num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[HP.num_filters]), name="b") conv = tf.nn.conv1d( value=x, filters=W, stride=1, padding="VALID" ) # shape: (n_batch*n_doc) * (n_seq - filter_size) * num_filters # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # shape not change # Maxpooling over the outputs # another implementation of max-pool pooled = tf.reduce_max(h, axis=1) # (n_batch*n_doc) * n_filter pooled_outputs.append(pooled) # three list of pooled array # Combine all the pooled features num_filters_total = HP.num_filters * len(HP.filter_sizes) h_pool = tf.concat(pooled_outputs, 1) # shape: (n_batch*n_doc) * num_filters_total # Add dropout with tf.name_scope("dropout"): h_drop = tf.nn.dropout(h_pool, dropout_keep_prob) # (n_batch * n_doc) * num_filters_total first_cnn_output = tf.reshape(h_drop, [-1, HP.max_document_length, num_filters_total]) # [n_batch, n_doc, n_filter] first_cnn_output = tf.concat([first_cnn_output, embedded_category], axis=2) # [n_batch, n_doc, n_filter + dim_category] h_drop = tf.reshape(first_cnn_output,[-1, (num_filters_total+HP.dim_category)]) # [(n_batch * n_doc), n_filter + dim_category] # do sentence loss with the matrix of the concat result of category & h_drop total_loss = 0 scores_sentence_soft_max_list =[] gradients_sentence_list = [] for (M, input_y) in enumerate(input_ys): with tf.name_scope("task"+str(M)): W = tf.Variable(tf.truncated_normal( [(num_filters_total+HP.dim_category), HP.num_classes], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[HP.num_classes]), name="b") scores_sentence = tf.nn.xw_plus_b(h_drop, W, b) # scores has shape: [(n_batch * n_doc), num_classes] # input_y: shape: [n_batch, num_classes] have to transfer the same shape to scores y = tf.tile(input_y, [1, HP.max_document_length]) # y: shape [n_batch, (num_classes*n_doc)] y = tf.reshape(y, [-1, HP.num_classes]) # y: shape [(n_batch*n_doc), num_classes] scores_sentence_soft_max = tf.nn.softmax(scores_sentence) # [(n_batch * n_doc), num_classes] scores_sentence_soft_max = tf.reshape(scores_sentence_soft_max, [-1, HP.max_document_length, HP.num_classes]) # [n_batch, n_doc, num_classes] # mask = tf.sequence_mask(sent_length) # scores_sentence_soft_max = tf.boolean_mask(scores_sentence_soft_max, mask) scores_sentence_soft_max_list.append(scores_sentence_soft_max) sentence_losses = tf.nn.softmax_cross_entropy_with_logits(logits=scores_sentence, labels=y) # sentence losses has shape: [(n_batch * n_doc), ] it is a 1D vector. gradients_sentence = tf.abs(tf.gradients(sentence_losses, [h_drop])) # [(n_batch * n_doc), n_filter + dim_category] # gradients_sentence = tf.reduce_max(gradients_sentence, axis=0) # [(n_batch * n_doc)] gradients_sentence = tf.reshape(gradients_sentence, [-1, HP.max_document_length, num_filters_total+HP.dim_category]) # [n_batch, n_doc, n_filter + dim_category] gradients_sentence_list.append(gradients_sentence) sentence_losses = tf.reshape(sentence_losses, [-1, HP.max_document_length]) # [n_batch, n_doc] mask = tf.sequence_mask(sent_length) sentence_losses = tf.boolean_mask(sentence_losses, mask) sentence_losses_avg = tf.reduce_mean(sentence_losses) total_loss += sentence_losses_avg * HP.lambda_regularizer_strength # =========================================== sentence-level CNN ================================================================== filter_shape = [HP.document_filter_size, (num_filters_total + HP.dim_category), HP.document_num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[HP.document_num_filters]), name="b") conv = tf.nn.conv1d( value=first_cnn_output, filters=W, stride=1, padding="VALID" ) # n_batch * (n_max_doc - filter+1) * doc_num_filters # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") # Maxpooling over the outputs # another implementation of max-pool pooled_second = tf.reduce_max(h, axis=1) # n_batch * document_num_filters patient_vector = pooled_second with tf.name_scope("dropout"): pooled_second_drop = tf.nn.dropout(pooled_second, dropout_keep_prob) scores_soft_max_list = [] for (M,input_y) in enumerate(input_ys): with tf.name_scope("task"+str(M)): ''' W_fully = tf.Variable(tf.truncated_normal([HP.document_num_filters, HP.document_num_filters], stddev=0.1), name="W_fully") b_fully = tf.Variable(tf.constant(0.1, shape=[HP.document_num_filters]), name="b_fully") scores_2 = tf.nn.xw_plus_b(pooled_second_drop, W_fully, b_fully) # n_batch * document_num_filters with tf.name_scope("dropout_second"): scores_drop = tf.nn.dropout(scores_2, dropout_keep_prob) ''' W = tf.Variable(tf.truncated_normal([HP.document_num_filters, HP.num_classes], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[HP.num_classes]), name="b") scores = tf.nn.xw_plus_b(pooled_second_drop, W, b) # scores_2 has shape: [n_batch, num_classes] scores_soft_max = tf.nn.softmax(scores) scores_soft_max_list.append(scores_soft_max) # scores_soft_max_list shape:[multi_size, n_batch, num_classes] # predictions = tf.argmax(scores, axis=1, name="predictions") # predictions has shape: [None, ]. A shape of [x, ] means a vector of size x losses = tf.nn.softmax_cross_entropy_with_logits(logits=scores, labels=input_y) # losses has shape: [None, ] # include target replication # total_loss += losses loss_avg = tf.reduce_mean(losses) total_loss += loss_avg # avg_loss = tf.reduce_mean(total_loss) # optimize function optimizer = tf.train.AdamOptimizer(learning_rate=HP.learning_rate) optimize = optimizer.minimize(total_loss) scores_soft_max_list = tf.stack(scores_soft_max_list, axis=0) scores_sentence_soft_max_list = tf.stack(scores_sentence_soft_max_list, axis=0) # correct_predictions = tf.equal(predictions, tf.argmax(input_y, 1)) # accuracy = tf.reduce_sum(tf.cast(correct_predictions, "float"), name="accuracy") return optimize, scores_soft_max_list, scores_sentence_soft_max_list, gradients_sentence_list, patient_vector
def update_pixel_locations_given_deformed_meshgrid(pixel_locations, original_meshgrid, deformed_meshgrid): """Updates the point pixel locations given a deformed meshgrid. Args: pixel_locations: A tf.int32 tensor of shape [N, 2] with y, x pixel locations. original_meshgrid: A tf.int32 tensor of size [height, width, 2] with y, x pixel locations. The assumption is that meshgrid values start from 1. deformed_meshgrid: A tf.int32 tensor of size [deformed_height, deformed_width, 2] with y, x pixel locations. Invalid positions have values less or equal to 0. Returns: update_pixel_locations: A tf.int32 tensor of shape [N, 2] with y, x pixel locations. """ max_y = tf.reduce_max(original_meshgrid[:, :, 0]) + 1 max_x = tf.reduce_max(original_meshgrid[:, :, 1]) + 1 pixel_indices = (pixel_locations[:, 0] + 1) * max_x + ( pixel_locations[:, 1] + 1) valid_pixel_locations_y = tf.logical_and( tf.greater_equal(pixel_locations[:, 0], 0), tf.less(pixel_locations[:, 0], tf.shape(original_meshgrid)[0])) valid_pixel_locations_x = tf.logical_and( tf.greater_equal(pixel_locations[:, 1], 0), tf.less(pixel_locations[:, 1], tf.shape(original_meshgrid)[1])) valid_pixel_locations = tf.logical_and(valid_pixel_locations_y, valid_pixel_locations_x) pixel_indices *= tf.cast(valid_pixel_locations, dtype=pixel_indices.dtype) valid_deformed_positions = tf.reduce_all(tf.greater(deformed_meshgrid, 0), axis=2) valid_deformed_positions = tf.reshape(valid_deformed_positions, [-1]) x_deformed_meshgrid, y_deformed_meshgrid = tf.meshgrid( tf.range(tf.shape(deformed_meshgrid)[1]), tf.range(tf.shape(deformed_meshgrid)[0])) yx_deformed_meshgrid = tf.stack([y_deformed_meshgrid, x_deformed_meshgrid], axis=2) yx_deformed_meshgrid = tf.boolean_mask( tf.reshape(yx_deformed_meshgrid, [-1, 2]), valid_deformed_positions) deformed_indices = (deformed_meshgrid[:, :, 0] * max_x + deformed_meshgrid[:, :, 1]) deformed_indices = tf.boolean_mask(tf.reshape(deformed_indices, [-1]), valid_deformed_positions) deformed_meshgrid = tf.boolean_mask(tf.reshape(deformed_meshgrid, [-1, 2]), valid_deformed_positions) scatter_nd_indices = tf.concat([ tf.stack([deformed_indices, tf.zeros_like(deformed_indices)], axis=1), tf.stack([deformed_indices, tf.ones_like(deformed_indices)], axis=1) ], axis=0) scatter_nd_updates = (tf.concat( [yx_deformed_meshgrid[:, 0], yx_deformed_meshgrid[:, 1]], axis=0) + 1) map_original_indices_to_deformed_yx = tf.scatter_nd( indices=tf.cast(scatter_nd_indices, dtype=tf.int64), updates=scatter_nd_updates, shape=[max_y * max_x, 2]) map_original_indices_to_deformed_yx -= 1 return tf.gather(map_original_indices_to_deformed_yx, pixel_indices)
def attention_peak_score(att, mel_mask): max_loc = tf.reduce_max(att, axis=3) # [N, n_heads, mel_dim] peak_score = tf.reduce_mean(max_loc * tf.cast(mel_mask, tf.float32), axis=-1) return tf.cast(peak_score, tf.float32)
def robust_norm(x): x = x + 1e-8 a = tf.reduce_max(tf.abs(x), axis=2, keep_dims=True) return tf.squeeze(a, [2]) * tf.norm(x / a, axis=2)
def __init__(self, config, batch, word_mat=None, char_mat=None, trainable=True, opt=True, demo=False, graph=None): self.config = config self.demo = demo self.graph = graph if graph is not None else tf.Graph() with self.graph.as_default(): self.global_step = tf.get_variable( 'global_step', shape=[], dtype=tf.int32, initializer=tf.constant_initializer(0), trainable=False) self.dropout = tf.placeholder_with_default(0.0, (), name="dropout") if self.demo: self.c = tf.placeholder(tf.int32, [None, config.test_para_limit], "context") self.q = tf.placeholder(tf.int32, [None, config.test_ques_limit], "question") self.ch = tf.placeholder( tf.int32, [None, config.test_para_limit, config.char_limit], "context_char") self.qh = tf.placeholder( tf.int32, [None, config.test_ques_limit, config.char_limit], "question_char") self.y1 = tf.placeholder(tf.int32, [None, config.test_para_limit], "answer_index1") self.y2 = tf.placeholder(tf.int32, [None, config.test_para_limit], "answer_index2") else: self.c, self.q, self.ch, self.qh, self.y1, self.y2, self.qa_id = batch.get_next( ) # self.word_unk = tf.get_variable("word_unk", shape = [config.glove_dim], initializer=initializer()) self.word_mat = tf.get_variable("word_mat", initializer=tf.constant( word_mat, dtype=tf.float32), trainable=False) self.char_mat = tf.get_variable("char_mat", initializer=tf.constant( char_mat, dtype=tf.float32)) self.c_mask = tf.cast(self.c, tf.bool) self.q_mask = tf.cast(self.q, tf.bool) self.c_len = tf.reduce_sum(tf.cast(self.c_mask, tf.int32), axis=1) self.q_len = tf.reduce_sum(tf.cast(self.q_mask, tf.int32), axis=1) if opt: # 利用batch中最长的文本,提取数据。节省资源 N, CL = config.batch_size if not self.demo else 1, config.char_limit self.c_maxlen = tf.reduce_max(self.c_len) self.q_maxlen = tf.reduce_max(self.q_len) # tf.slice: Extracts a slice from a tensor. # paras: input, begin, size self.c = tf.slice(self.c, [0, 0], [N, self.c_maxlen]) self.q = tf.slice(self.q, [0, 0], [N, self.q_maxlen]) self.c_mask = tf.slice(self.c_mask, [0, 0], [N, self.c_maxlen]) self.q_mask = tf.slice(self.q_mask, [0, 0], [N, self.q_maxlen]) self.ch = tf.slice(self.ch, [0, 0, 0], [N, self.c_maxlen, CL]) self.qh = tf.slice(self.qh, [0, 0, 0], [N, self.q_maxlen, CL]) self.y1 = tf.slice(self.y1, [0, 0], [N, self.c_maxlen]) self.y2 = tf.slice(self.y2, [0, 0], [N, self.c_maxlen]) else: self.c_maxlen, self.q_maxlen = config.para_limit, config.ques_limit self.ch_len = tf.reshape( tf.reduce_sum(tf.cast(tf.cast(self.ch, tf.bool), tf.int32), axis=2), [-1]) self.qh_len = tf.reshape( tf.reduce_sum(tf.cast(tf.cast(self.qh, tf.bool), tf.int32), axis=2), [-1]) self.forward() total_params() if trainable: self.lr = tf.minimum( config.learning_rate, 0.001 / tf.log(999.) * tf.log(tf.cast(self.global_step, tf.float32) + 1)) # 在深度学习笔记中,beta1,beta2,epsilon的值一般是0.9,0.999, 1e-8 self.opt = tf.train.AdamOptimizer(learning_rate=self.lr, beta1=0.8, beta2=0.999, epsilon=1e-7) grads = self.opt.compute_gradients(self.loss) gradients, variables = zip(*grads) capped_grads, _ = tf.clip_by_global_norm( gradients, config.grad_clip) self.train_op = self.opt.apply_gradients( zip(capped_grads, variables), global_step=self.global_step)
def build_train(make_obs_ph, q_func, num_actions, optimizer, grad_norm_clipping=None, gamma=1.0, double_q=True, scope="deepq", reuse=None, param_noise=False, param_noise_filter_func=None): """Creates the train function: Parameters ---------- make_obs_ph: str -> tf.placeholder or TfInput a function that takes a name and creates a placeholder of input with that name q_func: (tf.Variable, int, str, bool) -> tf.Variable the model that takes the following inputs: observation_in: object the output of observation placeholder num_actions: int number of actions scope: str reuse: bool should be passed to outer variable scope and returns a tensor of shape (batch_size, num_actions) with values of every action. num_actions: int number of actions reuse: bool whether or not to reuse the graph variables optimizer: tf.train.Optimizer optimizer to use for the Q-learning objective. grad_norm_clipping: float or None clip gradient norms to this value. If None no clipping is performed. gamma: float discount rate. double_q: bool if true will use Double Q Learning (https://arxiv.org/abs/1509.06461). In general it is a good idea to keep it enabled. scope: str or VariableScope optional scope for variable_scope. reuse: bool or None whether or not the variables should be reused. To be able to reuse the scope must be given. param_noise: bool whether or not to use parameter space noise (https://arxiv.org/abs/1706.01905) param_noise_filter_func: tf.Variable -> bool function that decides whether or not a variable should be perturbed. Only applicable if param_noise is True. If set to None, default_param_noise_filter is used by default. Returns ------- act: (tf.Variable, bool, float) -> tf.Variable function to select and action given observation. ` See the top of the file for details. train: (object, np.array, np.array, object, np.array, np.array) -> np.array optimize the error in Bellman's equation. ` See the top of the file for details. update_target: () -> () copy the parameters from optimized Q function to the target Q function. ` See the top of the file for details. debug: {str: function} a bunch of functions to print debug data like q_values. """ if param_noise: act_f = build_act_with_param_noise(make_obs_ph, q_func, num_actions, scope=scope, reuse=reuse, param_noise_filter_func=param_noise_filter_func) else: act_f = build_act(make_obs_ph, q_func, num_actions, scope=scope, reuse=reuse) with tf.variable_scope(scope, reuse=reuse): # set up placeholders obs_t_input = make_obs_ph("obs_t") act_t_ph = tf.placeholder(tf.int32, [None], name="action") rew_t_ph = tf.placeholder(tf.float32, [None], name="reward") obs_tp1_input = make_obs_ph("obs_tp1") done_mask_ph = tf.placeholder(tf.float32, [None], name="done") importance_weights_ph = tf.placeholder(tf.float32, [None], name="weight") # q network evaluation q_t = q_func(obs_t_input.get(), num_actions, scope="q_func", reuse=True) # reuse parameters from act q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name + "/q_func") # target q network evalution q_tp1 = q_func(obs_tp1_input.get(), num_actions, scope="target_q_func") target_q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=tf.get_variable_scope().name + "/target_q_func") # q scores for actions which we know were selected in the given state. q_t_selected = tf.reduce_sum(q_t * tf.one_hot(act_t_ph, num_actions), 1) # compute estimate of best possible value starting from state at t + 1 if double_q: q_tp1_using_online_net = q_func(obs_tp1_input.get(), num_actions, scope="q_func", reuse=True) q_tp1_best_using_online_net = tf.argmax(q_tp1_using_online_net, 1) q_tp1_best = tf.reduce_sum(q_tp1 * tf.one_hot(q_tp1_best_using_online_net, num_actions), 1) else: q_tp1_best = tf.reduce_max(q_tp1, 1) q_tp1_best_masked = (1.0 - done_mask_ph) * q_tp1_best # compute RHS of bellman equation q_t_selected_target = rew_t_ph + gamma * q_tp1_best_masked # compute the error (potentially clipped) td_error = q_t_selected - tf.stop_gradient(q_t_selected_target) errors = U.huber_loss(td_error) weighted_error = tf.reduce_mean(importance_weights_ph * errors) # compute optimization op (potentially with gradient clipping) if grad_norm_clipping is not None: gradients = optimizer.compute_gradients(weighted_error, var_list=q_func_vars) for i, (grad, var) in enumerate(gradients): if grad is not None: gradients[i] = (tf.clip_by_norm(grad, grad_norm_clipping), var) optimize_expr = optimizer.apply_gradients(gradients) else: optimize_expr = optimizer.minimize(weighted_error, var_list=q_func_vars) # update_target_fn will be called periodically to copy Q network to target Q network update_target_expr = [] for var, var_target in zip(sorted(q_func_vars, key=lambda v: v.name), sorted(target_q_func_vars, key=lambda v: v.name)): update_target_expr.append(var_target.assign(var)) update_target_expr = tf.group(*update_target_expr) # Create callable functions train = U.function( inputs=[ obs_t_input, act_t_ph, rew_t_ph, obs_tp1_input, done_mask_ph, importance_weights_ph ], outputs=td_error, updates=[optimize_expr] ) update_target = U.function([], [], updates=[update_target_expr]) q_values = U.function([obs_t_input], q_t) return act_f, train, update_target, {'q_values': q_values}
def forward(self): config = self.config N, PL, QL, CL, d, dc, nh = config.batch_size if not self.demo else 1, self.c_maxlen, self.q_maxlen, config.char_limit, config.hidden, config.char_dim, config.num_heads with tf.variable_scope("Input_Embedding_Layer"): ch_emb = tf.reshape(tf.nn.embedding_lookup(self.char_mat, self.ch), [N * PL, CL, dc]) qh_emb = tf.reshape(tf.nn.embedding_lookup(self.char_mat, self.qh), [N * QL, CL, dc]) ch_emb = tf.nn.dropout(ch_emb, 1.0 - 0.5 * self.dropout) qh_emb = tf.nn.dropout(qh_emb, 1.0 - 0.5 * self.dropout) # Bidaf style conv-highway encoder ch_emb = conv(ch_emb, d, bias=True, activation=tf.nn.relu, kernel_size=5, name="char_conv", reuse=None) qh_emb = conv(qh_emb, d, bias=True, activation=tf.nn.relu, kernel_size=5, name="char_conv", reuse=True) ch_emb = tf.reduce_max(ch_emb, axis=1) qh_emb = tf.reduce_max(qh_emb, axis=1) ch_emb = tf.reshape(ch_emb, [N, PL, ch_emb.shape[-1]]) qh_emb = tf.reshape(qh_emb, [N, QL, ch_emb.shape[-1]]) c_emb = tf.nn.dropout( tf.nn.embedding_lookup(self.word_mat, self.c), 1.0 - self.dropout) q_emb = tf.nn.dropout( tf.nn.embedding_lookup(self.word_mat, self.q), 1.0 - self.dropout) c_emb = tf.concat([c_emb, ch_emb], axis=2) q_emb = tf.concat([q_emb, qh_emb], axis=2) c_emb = highway(c_emb, size=d, scope="highway", dropout=self.dropout, reuse=None) q_emb = highway(q_emb, size=d, scope="highway", dropout=self.dropout, reuse=True) with tf.variable_scope("Embedding_Encoder_Layer"): c = residual_block(c_emb, num_blocks=1, num_conv_layers=4, kernel_size=7, mask=self.c_mask, num_filters=d, num_heads=nh, seq_len=self.c_len, scope="Encoder_Residual_Block", bias=False, dropout=self.dropout) q = residual_block( q_emb, num_blocks=1, num_conv_layers=4, kernel_size=7, mask=self.q_mask, num_filters=d, num_heads=nh, seq_len=self.q_len, scope="Encoder_Residual_Block", reuse=True, # Share the weights between passage and question bias=False, dropout=self.dropout) with tf.variable_scope("Context_to_Query_Attention_Layer"): C = tf.tile(tf.expand_dims(c, 2), [1, 1, self.q_maxlen, 1]) Q = tf.tile(tf.expand_dims(q, 1), [1, self.c_maxlen, 1, 1]) S = trilinear([C, Q, C * Q], input_keep_prob=1.0 - self.dropout) mask_q = tf.expand_dims(self.q_mask, 1) S_ = tf.nn.softmax(mask_logits(S, mask=mask_q)) mask_c = tf.expand_dims(self.c_mask, 2) S_T = tf.transpose( tf.nn.softmax(mask_logits(S, mask=mask_c), dim=1), (0, 2, 1)) self.c2q = tf.matmul(S_, q) self.q2c = tf.matmul(tf.matmul(S_, S_T), c) attention_outputs = [c, self.c2q, c * self.c2q] if config.q2c: attention_outputs.append(c * self.q2c) with tf.variable_scope("Model_Encoder_Layer"): inputs = tf.concat(attention_outputs, axis=-1) self.enc = [conv(inputs, d, name="input_projection")] for i in range(3): if i % 2 == 0: # dropout every 2 blocks self.enc[i] = tf.nn.dropout(self.enc[i], 1.0 - self.dropout) self.enc.append( residual_block(self.enc[i], num_blocks=7, num_conv_layers=2, kernel_size=5, mask=self.c_mask, num_filters=d, num_heads=nh, seq_len=self.c_len, scope="Model_Encoder", bias=False, reuse=True if i > 0 else None, dropout=self.dropout)) with tf.variable_scope("Output_Layer"): start_logits = tf.squeeze( conv(tf.concat([self.enc[1], self.enc[2]], axis=-1), 1, bias=False, name="start_pointer"), -1) end_logits = tf.squeeze( conv(tf.concat([self.enc[1], self.enc[3]], axis=-1), 1, bias=False, name="end_pointer"), -1) self.logits = [ mask_logits(start_logits, mask=self.c_mask), mask_logits(end_logits, mask=self.c_mask) ] logits1, logits2 = [l for l in self.logits] outer = tf.matmul(tf.expand_dims(tf.nn.softmax(logits1), axis=2), tf.expand_dims(tf.nn.softmax(logits2), axis=1)) outer = tf.matrix_band_part(outer, 0, 15) self.yp1 = tf.argmax(tf.reduce_max(outer, axis=2), axis=1) self.yp2 = tf.argmax(tf.reduce_max(outer, axis=1), axis=1) losses = tf.nn.softmax_cross_entropy_with_logits(logits=logits1, labels=self.y1) losses2 = tf.nn.softmax_cross_entropy_with_logits(logits=logits2, labels=self.y2) self.loss = tf.reduce_mean(losses + losses2) if config.l2_norm is not None: variables = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) l2_loss = tf.contrib.layers.apply_regularization( regularizer, variables) self.loss += l2_loss if config.decay is not None: self.var_ema = tf.train.ExponentialMovingAverage(config.decay) ema_op = self.var_ema.apply(tf.trainable_variables()) with tf.control_dependencies([ema_op]): self.loss = tf.identity(self.loss) self.shadow_vars = [] self.global_vars = [] for var in tf.global_variables(): v = self.var_ema.average(var) if v: self.shadow_vars.append(v) self.global_vars.append(var) self.assign_vars = [] for g, v in zip(self.global_vars, self.shadow_vars): self.assign_vars.append(tf.assign(g, v))
def PointASNLSetAbstraction(xyz, feature, npoint, nsample, mlp, is_training, bn_decay, weight_decay, scope, bn=True, use_knn=True, radius=None, as_neighbor=8, NL=True): ''' Input: xyz: (batch_size, ndataset, 3) TF tensor feature: (batch_size, ndataset, channel) TF tensor point: int32 -- #points sampled in Euclidean space by farthest point sampling nsample: int32 -- how many points in each local region mlp: list of int32 -- output size for MLP on each point Return: new_xyz: (batch_size, npoint, 3) TF tensor new_points: (batch_size, npoint, mlp[-1] or mlp2[-1]) TF tensor ''' with tf.variable_scope(scope) as sc: batch_size, num_points, num_channel = feature.get_shape() '''Farthest Point Sampling''' if num_points == npoint: new_xyz = xyz new_feature = feature else: new_xyz, new_feature = sampling(npoint, xyz, feature) grouped_xyz, new_point, idx = grouping(feature, nsample, xyz, new_xyz,use_knn=use_knn,radius=radius) nl_channel = mlp[-1] '''Adaptive Sampling''' if num_points != npoint: new_xyz, new_feature = AdaptiveSampling(grouped_xyz, new_point, as_neighbor, is_training, bn_decay, weight_decay, scope, bn) grouped_xyz -= tf.tile(tf.expand_dims(new_xyz, 2), [1, 1, nsample, 1]) # translation normalization new_point = tf.concat([grouped_xyz, new_point], axis=-1) '''Point NonLocal Cell''' if NL: new_nonlocal_point = PointNonLocalCell(feature, tf.expand_dims(new_feature, axis=1), [max(32, num_channel//2), nl_channel], is_training, bn_decay, weight_decay, scope, bn) '''Skip Connection''' skip_spatial = tf.reduce_max(new_point, axis=[2]) skip_spatial = tf_util.conv1d(skip_spatial, mlp[-1], 1,padding='VALID', stride=1, bn=bn, is_training=is_training, scope='skip', bn_decay=bn_decay, weight_decay=weight_decay) '''Point Local Cell''' for i, num_out_channel in enumerate(mlp): if i != len(mlp) - 1: new_point = tf_util.conv2d(new_point, num_out_channel, [1,1], padding='VALID', stride=[1,1], bn=bn, is_training=is_training, scope='conv%d'%(i), bn_decay=bn_decay, weight_decay = weight_decay) weight = weight_net_hidden(grouped_xyz, [32], scope = 'weight_net', is_training=is_training, bn_decay = bn_decay, weight_decay = weight_decay) new_point = tf.transpose(new_point, [0, 1, 3, 2]) new_point = tf.matmul(new_point, weight) new_point = tf_util.conv2d(new_point, mlp[-1], [1,new_point.get_shape()[2].value], padding='VALID', stride=[1,1], bn=bn, is_training=is_training, scope='after_conv', bn_decay=bn_decay, weight_decay = weight_decay) new_point = tf.squeeze(new_point, [2]) # (batch_size, npoints, mlp2[-1]) new_point = tf.add(new_point,skip_spatial) if NL: new_point = tf.add(new_point, new_nonlocal_point) '''Feature Fushion''' new_point = tf_util.conv1d(new_point, mlp[-1], 1, padding='VALID', stride=1, bn=bn, is_training=is_training, scope='aggregation', bn_decay=bn_decay, weight_decay=weight_decay) return new_xyz, new_point
def ssd_model_fn(features, labels, mode, params): """model_fn for SSD to be used with our Estimator.""" shape = labels['shape'] loc_targets = labels['loc_targets'] cls_targets = labels['cls_targets'] match_scores = labels['match_scores'] global global_anchor_info decode_fn = global_anchor_info['decode_fn'] num_anchors_per_layer = global_anchor_info['num_anchors_per_layer'] all_num_anchors_depth = global_anchor_info['all_num_anchors_depth'] # bboxes_pred = decode_fn(loc_targets[0]) # bboxes_pred = [tf.reshape(preds, [-1, 4]) for preds in bboxes_pred] # bboxes_pred = tf.concat(bboxes_pred, axis=0) # save_image_op = tf.py_func(save_image_with_bbox, # [ssd_preprocessing.unwhiten_image(features[0]), # tf.clip_by_value(cls_targets[0], 0, tf.int64.max), # match_scores[0], # bboxes_pred], # tf.int64, stateful=True) # with tf.control_dependencies([save_image_op]): #print(all_num_anchors_depth) with tf.variable_scope(params['model_scope'], default_name=None, values=[features], reuse=tf.AUTO_REUSE): backbone = ssd_net.VGG16Backbone(params['data_format']) feature_layers = backbone.forward( features, training=(mode == tf.estimator.ModeKeys.TRAIN)) #print(feature_layers) location_pred, cls_pred = ssd_net.multibox_head( feature_layers, params['num_classes'], all_num_anchors_depth, data_format=params['data_format']) if params['data_format'] == 'channels_first': cls_pred = [tf.transpose(pred, [0, 2, 3, 1]) for pred in cls_pred] location_pred = [ tf.transpose(pred, [0, 2, 3, 1]) for pred in location_pred ] cls_pred = [ tf.reshape(pred, [tf.shape(features)[0], -1, params['num_classes']]) for pred in cls_pred ] location_pred = [ tf.reshape(pred, [tf.shape(features)[0], -1, 4]) for pred in location_pred ] cls_pred = tf.concat(cls_pred, axis=1) location_pred = tf.concat(location_pred, axis=1) cls_pred = tf.reshape(cls_pred, [-1, params['num_classes']]) location_pred = tf.reshape(location_pred, [-1, 4]) with tf.device('/cpu:0'): with tf.control_dependencies([cls_pred, location_pred]): with tf.name_scope('post_forward'): #bboxes_pred = decode_fn(location_pred) bboxes_pred = tf.map_fn( lambda _preds: decode_fn(_preds), tf.reshape(location_pred, [tf.shape(features)[0], -1, 4]), dtype=[tf.float32] * len(num_anchors_per_layer), back_prop=False) #cls_targets = tf.Print(cls_targets, [tf.shape(bboxes_pred[0]),tf.shape(bboxes_pred[1]),tf.shape(bboxes_pred[2]),tf.shape(bboxes_pred[3])]) bboxes_pred = [ tf.reshape(preds, [-1, 4]) for preds in bboxes_pred ] bboxes_pred = tf.concat(bboxes_pred, axis=0) flaten_cls_targets = tf.reshape(cls_targets, [-1]) flaten_match_scores = tf.reshape(match_scores, [-1]) flaten_loc_targets = tf.reshape(loc_targets, [-1, 4]) # each positive examples has one label positive_mask = flaten_cls_targets > 0 n_positives = tf.count_nonzero(positive_mask) batch_n_positives = tf.count_nonzero(cls_targets, -1) batch_negtive_mask = tf.equal( cls_targets, 0 ) #tf.logical_and(tf.equal(cls_targets, 0), match_scores > 0.) batch_n_negtives = tf.count_nonzero(batch_negtive_mask, -1) batch_n_neg_select = tf.cast( params['negative_ratio'] * tf.cast(batch_n_positives, tf.float32), tf.int32) batch_n_neg_select = tf.minimum( batch_n_neg_select, tf.cast(batch_n_negtives, tf.int32)) # hard negative mining for classification predictions_for_bg = tf.nn.softmax( tf.reshape( cls_pred, [tf.shape(features)[0], -1, params['num_classes'] ]))[:, :, 0] prob_for_negtives = tf.where( batch_negtive_mask, 0. - predictions_for_bg, # ignore all the positives 0. - tf.ones_like(predictions_for_bg)) topk_prob_for_bg, _ = tf.nn.top_k( prob_for_negtives, k=tf.shape(prob_for_negtives)[1]) score_at_k = tf.gather_nd( topk_prob_for_bg, tf.stack([ tf.range(tf.shape(features)[0]), batch_n_neg_select - 1 ], axis=-1)) selected_neg_mask = prob_for_negtives >= tf.expand_dims( score_at_k, axis=-1) # include both selected negtive and all positive examples final_mask = tf.stop_gradient( tf.logical_or( tf.reshape( tf.logical_and(batch_negtive_mask, selected_neg_mask), [-1]), positive_mask)) total_examples = tf.count_nonzero(final_mask) cls_pred = tf.boolean_mask(cls_pred, final_mask) location_pred = tf.boolean_mask( location_pred, tf.stop_gradient(positive_mask)) flaten_cls_targets = tf.boolean_mask( tf.clip_by_value(flaten_cls_targets, 0, params['num_classes']), final_mask) flaten_loc_targets = tf.stop_gradient( tf.boolean_mask(flaten_loc_targets, positive_mask)) predictions = { 'classes': tf.argmax(cls_pred, axis=-1), 'probabilities': tf.reduce_max(tf.nn.softmax(cls_pred, name='softmax_tensor'), axis=-1), 'loc_predict': bboxes_pred } cls_accuracy = tf.metrics.accuracy(flaten_cls_targets, predictions['classes']) metrics = {'cls_accuracy': cls_accuracy} # Create a tensor named train_accuracy for logging purposes. tf.identity(cls_accuracy[1], name='cls_accuracy') tf.summary.scalar('cls_accuracy', cls_accuracy[1]) if mode == tf.estimator.ModeKeys.PREDICT: return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) # Calculate loss, which includes softmax cross entropy and L2 regularization. #cross_entropy = tf.cond(n_positives > 0, lambda: tf.losses.sparse_softmax_cross_entropy(labels=flaten_cls_targets, logits=cls_pred), lambda: 0.)# * (params['negative_ratio'] + 1.) #flaten_cls_targets=tf.Print(flaten_cls_targets, [flaten_loc_targets],summarize=50000) cross_entropy = tf.losses.sparse_softmax_cross_entropy( labels=flaten_cls_targets, logits=cls_pred) * (params['negative_ratio'] + 1.) # Create a tensor named cross_entropy for logging purposes. tf.identity(cross_entropy, name='cross_entropy_loss') tf.summary.scalar('cross_entropy_loss', cross_entropy) #loc_loss = tf.cond(n_positives > 0, lambda: modified_smooth_l1(location_pred, tf.stop_gradient(flaten_loc_targets), sigma=1.), lambda: tf.zeros_like(location_pred)) loc_loss = modified_smooth_l1(location_pred, flaten_loc_targets, sigma=1.) #loc_loss = modified_smooth_l1(location_pred, tf.stop_gradient(gtargets)) loc_loss = tf.reduce_mean(tf.reduce_sum(loc_loss, axis=-1), name='location_loss') tf.summary.scalar('location_loss', loc_loss) tf.losses.add_loss(loc_loss) l2_loss_vars = [] for trainable_var in tf.trainable_variables(): if '_bn' not in trainable_var.name: if 'conv4_3_scale' not in trainable_var.name: l2_loss_vars.append(tf.nn.l2_loss(trainable_var)) else: l2_loss_vars.append(tf.nn.l2_loss(trainable_var) * 0.1) # Add weight decay to the loss. We exclude the batch norm variables because # doing so leads to a small improvement in accuracy. total_loss = tf.add(cross_entropy + loc_loss, tf.multiply(params['weight_decay'], tf.add_n(l2_loss_vars), name='l2_loss'), name='total_loss') if mode == tf.estimator.ModeKeys.TRAIN: global_step = tf.train.get_or_create_global_step() lr_values = [ params['learning_rate'] * decay for decay in params['lr_decay_factors'] ] learning_rate = tf.train.piecewise_constant( tf.cast(global_step, tf.int32), [int(_) for _ in params['decay_boundaries']], lr_values) truncated_learning_rate = tf.maximum(learning_rate, tf.constant( params['end_learning_rate'], dtype=learning_rate.dtype), name='learning_rate') # Create a tensor named learning_rate for logging purposes. tf.summary.scalar('learning_rate', truncated_learning_rate) optimizer = tf.train.MomentumOptimizer( learning_rate=truncated_learning_rate, momentum=params['momentum']) optimizer = tf.contrib.estimator.TowerOptimizer(optimizer) # Batch norm requires update_ops to be added as a train_op dependency. update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) with tf.control_dependencies(update_ops): train_op = optimizer.minimize(total_loss, global_step) else: train_op = None return tf.estimator.EstimatorSpec( mode=mode, predictions=predictions, loss=total_loss, train_op=train_op, eval_metric_ops=metrics, scaffold=tf.train.Scaffold(init_fn=get_init_fn()))
def __init__(self, env, q_func, optimizer_spec, session, exploration, replay_buffer_size, batch_size, gamma, learning_starts, learning_freq, frame_history_len, target_update_freq, grad_norm_clipping, double_q=True, logdir=None, max_steps=2e8, cartpole=False): """Run Deep Q-learning algorithm. You can specify your own convnet using `q_func`. All schedules are w.r.t. total number of steps taken in the environment. Parameters ---------- env: gym.Env gym environment to train on. q_func: function Model to use for computing the q function. It should accept the following named arguments: img_in: tf.Tensor tensorflow tensor representing the input image num_actions: int number of actions scope: str scope in which all the model related variables should be created reuse: bool whether previously created variables should be reused. optimizer_spec: OptimizerSpec Specifying the constructor and kwargs, as well as learning rate schedule for the optimizer session: tf.Session tensorflow session to use. exploration: Schedule schedule for probability of chosing random action. replay_buffer_size: int How many memories to store in the replay buffer. batch_size: int How many transitions to sample each time experience is replayed. gamma: float Discount Factor learning_starts: int After how many environment steps to start replaying experiences learning_freq: int How many steps of environment to take between every experience replay frame_history_len: int How many past frames to include as input to the model. target_update_freq: int How many experience replay rounds (not steps!) to perform between each update to the target Q network grad_norm_clipping: float or None If not None gradients' norms are clipped to this value. double_q: bool If True, use double Q-learning to compute target values. Otherwise, vanilla DQN. https://papers.nips.cc/paper/3964-double-q-learning.pdf logdir: str Where we save the results for plotting later. max_steps: int Maximum number of training steps. The number of *frames* is 4x this quantity (modulo the initial random no-op steps). cartpole: bool If True, CartPole-v0. Else, PongNoFrameskip-v4 """ assert type(env.observation_space) == gym.spaces.Box assert type(env.action_space) == gym.spaces.Discrete self.max_steps = int(max_steps) self.target_update_freq = target_update_freq self.optimizer_spec = optimizer_spec self.batch_size = batch_size self.learning_freq = learning_freq self.learning_starts = learning_starts self.session = session self.exploration = exploration self.double_q = double_q self.cartpole = cartpole self.env = env if cartpole: input_shape = self.env.observation_space.shape # should be (4,) else: img_h, img_w, img_c = self.env.observation_space.shape input_shape = (img_h, img_w, frame_history_len * img_c) self.num_actions = self.env.action_space.n # ---------------------------------------------------------------------- # Set up TensorFlow placeholders for: # # - current observation (or state) # - current action # - current reward # - next observation (or state) # - end of episode mask # # For the end of episode mask: value is 1 if the next state corresponds # to the end of an episode, in which case there is no Q-value at the # next state; at the end of an episode, only the current state reward # contributes to the target, not the next state Q-value (i.e. target is # just rew_t_ph, not rew_t_ph + gamma * q_tp1). # # (You should not need to modify this placeholder code.) # ---------------------------------------------------------------------- if cartpole: self.obs_t_ph = tf.placeholder(tf.float32, [None]+list(input_shape)) self.obs_tp1_ph = tf.placeholder(tf.float32, [None]+list(input_shape)) else: self.obs_t_ph = tf.placeholder(tf.uint8, [None]+list(input_shape)) self.obs_tp1_ph = tf.placeholder(tf.uint8, [None]+list(input_shape)) self.act_t_ph = tf.placeholder(tf.int32, [None]) self.rew_t_ph = tf.placeholder(tf.float32, [None]) self.done_mask_ph = tf.placeholder(tf.float32, [None]) # Casting to float on GPU ensures lower data transfer times. if cartpole: obs_t_float = self.obs_t_ph obs_tp1_float = self.obs_tp1_ph else: obs_t_float = tf.cast(self.obs_t_ph, tf.float32) / 255.0 obs_tp1_float = tf.cast(self.obs_tp1_ph, tf.float32) / 255.0 # ---------------------------------------------------------------------- # You should fill in your own code to compute the Bellman error. This # requires evaluating the current and next Q-values and constructing the # corresponding error. TensorFlow will differentiate this error for # you; you just need to pass it to the optimizer. # # Your code should produce one scalar-valued tensor: `self.total_error`. # This will be passed to the optimizer in the provided code below. # # Your code should also produce two collections of variables: # # q_func_vars # target_q_func_vars # # These should hold all of the variables of the Q-function network and # target network, respectively. A convenient way to get these is to make # use of TF's "scope" feature. For example, you can create your # Q-function network with the scope "q_func" like this: # # <something> = q_func(obs_t_float, num_actions, scope="q_func", reuse=False) # # And then you can obtain the variables like this: # # q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_func') # # Tips: use huber_loss (from dqn_utils) instead of squared error when # defining `self.total_error`. If you are using double DQN, modify your # code here to support that and normal (i.e., non-double) DQN. # ---------------------------------------------------------------------- # ---------------------------------------------------------------------- # START OF YOUR CODE # ---------------------------------------------------------------------- q_curr = q_func(obs_t_float, self.num_actions, scope='q_func', reuse=False) self.best_act = tf.argmax(q_curr, axis=1) q_next = q_func(obs_tp1_float, self.num_actions, scope='target_q_func', reuse=False) if self.double_q: best_act_next = tf.argmax(q_next, axis=1, output_type=tf.int32) max_next_q = tf.gather_nd(q_next, tf.stack([tf.range(tf.shape(q_next)[0]), best_act_next], axis=1)) Y_best = self.rew_t_ph + gamma * max_next_q * (1.0 - self.done_mask_ph) else: max_next_q = tf.reduce_max(q_next, axis=1) Y_best = self.rew_t_ph + gamma * max_next_q * (1.0 - self.done_mask_ph) Y_actual = tf.gather_nd(q_curr, tf.stack([tf.range(tf.shape(q_curr)[0]), self.act_t_ph], axis=1)) self.total_error = huber_loss(Y_best - Y_actual) q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='q_func') target_q_func_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='target_q_func') # ---------------------------------------------------------------------- # END OF YOUR CODE # ---------------------------------------------------------------------- # Construct optimization op (with gradient clipping). self.learning_rate = tf.placeholder(tf.float32, (), name="learning_rate") optimizer = self.optimizer_spec.constructor(learning_rate=self.learning_rate, **self.optimizer_spec.kwargs) self.train_fn = minimize_and_clip(optimizer, self.total_error, var_list=q_func_vars, clip_val=grad_norm_clipping) # update_target_fn will be called periodically to copy Q network to target Q network update_target_fn = [] for var, var_target in zip(sorted(q_func_vars, key=lambda v: v.name), sorted(target_q_func_vars, key=lambda v: v.name)): update_target_fn.append(var_target.assign(var)) self.update_target_fn = tf.group(*update_target_fn) # Construct the replay buffer self.replay_buffer = ReplayBuffer(replay_buffer_size, frame_history_len, cartpole=cartpole) self.replay_buffer_idx = None # Bells and whistles. Note the `self.env.reset()` call, though! self.model_initialized = False self.num_param_updates = 0 self.mean_episode_reward = -float('nan') self.std_episode_reward = -float('nan') self.best_mean_episode_reward = -float('inf') if cartpole: self.log_every_n_steps = 1000 else: self.log_every_n_steps = 10000 self.start_time = time.time() self.last_obs = self.env.reset() self.t = 0
def resize_to_range(image, skel, skel_35, label=None, label_order=None, min_size=None, max_size=None, factor=None, align_corners=True, label_layout_is_chw=False, scope=None, method=tf.image.ResizeMethod.BILINEAR): """Resizes image or label so their sides are within the provided range. The output size can be described by two cases: 1. If the image can be rescaled so its minimum size is equal to min_size without the other side exceeding max_size, then do so. 2. Otherwise, resize so the largest side is equal to max_size. An integer in `range(factor)` is added to the computed sides so that the final dimensions are multiples of `factor` plus one. Args: image: A 3D tensor of shape [height, width, channels]. label: (optional) A 3D tensor of shape [height, width, channels] (default) or [channels, height, width] when label_layout_is_chw = True. min_size: (scalar) desired size of the smaller image side. max_size: (scalar) maximum allowed size of the larger image side. Note that the output dimension is no larger than max_size and may be slightly smaller than min_size when factor is not None. factor: Make output size multiple of factor plus one. align_corners: If True, exactly align all 4 corners of input and output. label_layout_is_chw: If true, the label has shape [channel, height, width]. We support this case because for some instance segmentation dataset, the instance segmentation is saved as [num_instances, height, width]. scope: Optional name scope. method: Image resize method. Defaults to tf.image.ResizeMethod.BILINEAR. Returns: A 3-D tensor of shape [new_height, new_width, channels], where the image has been resized (with the specified method) so that min(new_height, new_width) == ceil(min_size) or max(new_height, new_width) == ceil(max_size). Raises: ValueError: If the image is not a 3D tensor. """ with tf.name_scope(scope, 'resize_to_range', [image]): new_tensor_list = [] min_size = tf.to_float(min_size) if max_size is not None: max_size = tf.to_float(max_size) # Modify the max_size to be a multiple of factor plus 1 and make sure the # max dimension after resizing is no larger than max_size. if factor is not None: max_size = (max_size + (factor - (max_size - 1) % factor) % factor - factor) [orig_height, orig_width, _] = resolve_shape(image, rank=3) orig_height = tf.to_float(orig_height) orig_width = tf.to_float(orig_width) orig_min_size = tf.minimum(orig_height, orig_width) # Calculate the larger of the possible sizes large_scale_factor = min_size / orig_min_size large_height = tf.to_int32(tf.ceil(orig_height * large_scale_factor)) large_width = tf.to_int32(tf.ceil(orig_width * large_scale_factor)) large_size = tf.stack([large_height, large_width]) new_size = large_size if max_size is not None: # Calculate the smaller of the possible sizes, use that if the larger # is too big. orig_max_size = tf.maximum(orig_height, orig_width) small_scale_factor = max_size / orig_max_size small_height = tf.to_int32(tf.ceil(orig_height * small_scale_factor)) small_width = tf.to_int32(tf.ceil(orig_width * small_scale_factor)) small_size = tf.stack([small_height, small_width]) new_size = tf.cond( tf.to_float(tf.reduce_max(large_size)) > max_size, lambda: small_size, lambda: large_size) # Ensure that both output sides are multiples of factor plus one. if factor is not None: new_size += (factor - (new_size - 1) % factor) % factor new_tensor_list.append(tf.image.resize_images( image, new_size, method=method, align_corners=align_corners)) new_tensor_list.append(tf.image.resize_images( skel, new_size, method=method, align_corners=align_corners)) new_tensor_list.append(tf.image.resize_images( skel_35, new_size, method=method, align_corners=align_corners)) if label is not None: if label_layout_is_chw: # Input label has shape [channel, height, width]. resized_label = tf.expand_dims(label, 3) resized_label = tf.image.resize_nearest_neighbor( resized_label, new_size, align_corners=align_corners) resized_label = tf.squeeze(resized_label, 3) else: # Input label has shape [height, width, channel]. resized_label = tf.image.resize_images( label, new_size, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=align_corners) new_tensor_list.append(resized_label) else: new_tensor_list.append(None) if label_order is not None: if label_layout_is_chw: # Input label has shape [channel, height, width]. resized_label_order = tf.expand_dims(label_order, 3) resized_label_order = tf.image.resize_nearest_neighbor( resized_label_order, new_size, align_corners=align_corners) resized_label_order = tf.squeeze(resized_label_order, 3) else: # Input label has shape [height, width, channel]. resized_label_order = tf.image.resize_images( label_order, new_size, method=tf.image.ResizeMethod.NEAREST_NEIGHBOR, align_corners=align_corners) new_tensor_list.append(resized_label_order) else: new_tensor_list.append(None) return new_tensor_list
def gumbel_softmax(x, name, z_size, mode, softmax_k=0, kl_warmup_steps=150000, summary=True): """Gumbel softmax discretization bottleneck. Args: x: Input to the discretization bottleneck. name: Name for the bottleneck scope. z_size: Number of bits used to produce discrete code; discrete codes range from 1 to 2**z_size. mode: Mode represents whether we are training or testing for bottlenecks that differ in behavior (Default: None). softmax_k: If > 1 then do top-k softmax (Default: 0). kl_warmup_steps: Number of steps for kl warmup (Default: 150000). summary: If True, then write summaries (Default: True). Returns: Embedding function, discrete code and loss. """ with tf.variable_scope(name): m = tf.layers.dense(x, 2**z_size, name="mask") if softmax_k > 0: m, kl = top_k_softmax(m, softmax_k) return m, m, 1.0 - tf.reduce_mean(kl) logsm = tf.nn.log_softmax(m) # Gumbel-softmax sample. gumbel_samples = gumbel_sample(common_layers.shape_list(m)) steps = kl_warmup_steps gumbel_samples *= common_layers.inverse_exp_decay(steps // 5) * 0.5 temperature = 1.2 - common_layers.inverse_lin_decay(steps) # 10% of the time keep reasonably high temperature to keep learning. temperature = tf.cond( tf.less(tf.random_uniform([]), 0.9), lambda: temperature, lambda: tf.random_uniform([], minval=0.5, maxval=1.0)) s = tf.nn.softmax((logsm + gumbel_samples) / temperature) m = tf.nn.softmax(m) kl = -tf.reduce_max(logsm, axis=-1) if summary: tf.summary.histogram("max-log", tf.reshape(kl, [-1])) # Calculate the argmax and construct hot vectors. maxvec = tf.reshape(tf.argmax(m, axis=-1), [-1]) maxvhot = tf.stop_gradient(tf.one_hot(maxvec, 2**z_size)) # Add losses that prevent too few being used. distrib = tf.reshape(logsm, [-1, 2**z_size]) * maxvhot d_mean = tf.reduce_mean(distrib, axis=[0], keep_dims=True) d_variance = tf.reduce_mean(tf.square(distrib - d_mean), axis=[0]) d_dev = -tf.reduce_mean(d_variance) ret = s if mode != tf.contrib.learn.ModeKeys.TRAIN: ret = tf.reshape(maxvhot, common_layers.shape_list(s)) # Just hot @eval. return m, ret, d_dev * 5.0 + tf.reduce_mean(kl) * 0.002