def hidden_forward(self, l): w, b = self.get_w_b(l) self.z[l] = affine_transform(w, self.a[l - 1], b) if self.activation == TANH: self.a[l] = np.tanh(self.z[l]) elif self.activation == RELU: self.a[l] = np.maximum(0, self.z[l])
def __getitem__(self, idx): inputs = torch.zeros((self.temporal, 1, 256, 256)) targets = torch.zeros((self.temporal, 13, 64, 64)) target_weights = torch.zeros((self.temporal, 13, 3)) cs = torch.zeros(self.temporal, 2) ss = torch.zeros(self.temporal, 2) joints_ture = torch.zeros(self.temporal, 13, 3) for k in range(self.temporal): img_path = self.temporal_dir[idx][k] joint = self.labels[idx][k] joint_true = self.joint_true[idx][k] data_numpy = mpimg.imread(img_path) # self.plot_2d(data_numpy, joint) joint = np.array(joint) joint_true = np.array(joint_true) u = joint[:, 0] v = joint[:, 1] c = np.array([(max(u) + min(u)) / 2, (max(v) + min(v)) / 2], dtype=np.float) c = np.array(c, dtype=np.float) s = (max(v) - min(v)) * 0.0065 s = np.array([s, s], dtype=np.float) if c[0] != -1: c[1] = c[1] + 15 * s[1] s = s * 1.25 c = c - 1 r = 0 cs[k] = torch.from_numpy(c) ss[k] = torch.from_numpy(s) joints_ture[k] = torch.from_numpy(joint_true) trans = get_affine_transform(c, s, r, self.image_size) input = cv2.warpAffine( data_numpy, trans, (int(self.image_size[0]), int(self.image_size[1])), flags=cv2.INTER_LINEAR) for i in range(13): # if joints_vis[i, 0] > 0.0: joint[i, 0:2] = affine_transform(joint[i, 0:2], trans) # print(joints_vis) target, target_weight = self.generate_target(joint, joint) target = torch.from_numpy(target) target_weight = torch.from_numpy(target_weight) aa = torch.zeros((1, 256, 256)) # simple aa[0, :, :] = torch.from_numpy(input) input = aa inputs[k] = input targets[k] = target target_weights[k] = target_weight if self.train: return inputs, targets, target_weights else: return inputs, targets, target_weights, cs, ss, joints_ture, data_numpy, self.temporal_dir[ idx]
def predict(self, image: np.ndarray) -> list: src_image = image.copy() c = np.array([image.shape[1] / 2., image.shape[0] / 2.], dtype=np.float32) s = max(image.shape[0], image.shape[1]) * 1.0 tgt_w = 512 tgt_h = 512 image = preprocess_image(image, c, s, tgt_w=tgt_w, tgt_h=tgt_h) if self.flip_test: flipped_image = image[:, ::-1] input = np.stack([image, flipped_image], axis=0) else: input = np.expand_dims(image, axis=0) with self.sess.graph.as_default(): predictions = self.prediction_model.predict_on_batch(input)[0] scores = predictions[:, 4] indices = np.where(scores > self.score_threshold)[ 0] # select indices which have a score above the threshold # select those detections predictions = predictions[indices] predictions = predictions.astype(np.float64) trans = get_affine_transform(c, s, (tgt_w // 4, tgt_h // 4), inv=1) for j in range(predictions.shape[0]): predictions[j, 0:2] = affine_transform(predictions[j, 0:2], trans) predictions[j, 2:4] = affine_transform(predictions[j, 2:4], trans) predictions[:, [0, 2]] = np.clip(predictions[:, [0, 2]], 0, src_image.shape[1]) predictions[:, [1, 3]] = np.clip(predictions[:, [1, 3]], 0, src_image.shape[0]) return predictions
def chebys_tracer(coef_ord_combos, time_points, zipped: bool = False): "Generate pretraining samples that follow Chebyshev polinomials." safeguard = 0.1 coef_ord_tuples = [] controls = [] for coef_ord_tuple, fun in chebys_generator(coef_ord_combos): control = [] for t in time_points: c = fun(t) c = affine_transform(c, -1, 1, 0 + safeguard, 5 - safeguard) control.append(c) coef_ord_tuples.append(coef_ord_tuple) controls.append(control) if zipped: return coef_ord_tuples, list(zip(*controls)) return coef_ord_tuples, controls
def time_attention(self, inputs, reuse=None): with tf.variable_scope("attention", reuse=reuse): cell_fw = [tf.contrib.rnn.LSTMCell(size, initializer=tf.contrib.layers.xavier_initializer(), reuse=reuse) for size in [self.num_cell_time]*self.num_layers] cell_bw = [tf.contrib.rnn.LSTMCell(size, initializer=tf.contrib.layers.xavier_initializer(), reuse=reuse) for size in [self.num_cell_time]*self.num_layers] cell_fw = tf.nn.rnn_cell.MultiRNNCell(cells=cell_fw, state_is_tuple=True) cell_bw = tf.nn.rnn_cell.MultiRNNCell(cells=cell_bw, state_is_tuple=True) cell_out_list = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=self.length(inputs), dtype=tf.float32)[0] cell_output = tf.reshape(tf.concat([cell_out_list[0], cell_out_list[1]], 2), shape=[-1, data_step*self.num_cell_time*2]) t_attention = tf.expand_dims(tf.sigmoid(utils.affine_transform(cell_output, data_step, seed=0, name='sigmoid')), axis=2) return t_attention
def inference(self, inputs, reuse=None): with tf.variable_scope("classification", reuse=reuse): cell_fw = [tf.contrib.rnn.LSTMCell(size, initializer=tf.contrib.layers.xavier_initializer(), reuse=reuse) for size in [self.num_cell]*self.num_layers] cell_bw = [tf.contrib.rnn.LSTMCell(size, initializer=tf.contrib.layers.xavier_initializer(), reuse=reuse) for size in [self.num_cell]*self.num_layers] cell_fw = tf.nn.rnn_cell.MultiRNNCell(cells=cell_fw, state_is_tuple=True) cell_bw = tf.nn.rnn_cell.MultiRNNCell(cells=cell_bw, state_is_tuple=True) cell_out_list = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=self.length(inputs), dtype=tf.float32)[0] cell_out_fw = self.last_relevant(cell_out_list[0], self.length(inputs)) cell_out_bw = self.last_relevant(tf.reverse(cell_out_list[1], axis=[1]), self.length(inputs)) cell_output = tf.concat([cell_out_fw, cell_out_bw], 1) logits = utils.affine_transform(cell_output, self.num_class, seed=0, name='softmax_logits') return logits
def get_next_input(self, cell_output, reuse=None): raw_inputs = self.inputs is_training = self.is_training with tf.variable_scope("baseline", reuse=reuse): baseline = tf.sigmoid( utils.affine_transform((((cell_output))), 1, name='baseline')) self.baselines.append(baseline) with tf.variable_scope("selection_network", reuse=reuse): mean_bp = smooth_softmax( utils.batch_norm_affine_transform((cell_output), int(bdnn_winlen), decay=decay, name='selection', is_training=is_training)) # mean_bp = softmax( # utils.batch_norm_affine_transform(cell_output, int(bdnn_winlen), decay=decay, name='selection', # is_training=is_training), beta) self.mean_bps.append(mean_bp) # rand_seq = tf.random_uniform(mean_bp.get_shape().as_list(), minval=0, maxval=1, seed=SEED) if is_training: sampled_bp = tf.multinomial(mean_bp, num_samples=1, seed=SEED) sampled_bp = utils.onehot_tensor(sampled_bp, bdnn_winlen) else: sampled_bp = mean_bp sampled_bp = tf.stop_gradient(sampled_bp) self.sampled_bps.append(sampled_bp) return self.get_glimpse(raw_inputs, mean_bp, reuse=True)
def last_forward(self): w, b = self.get_w_b(self.L) self.z[self.L] = affine_transform(w, self.a[self.L - 1], b) self.a[self.L] = softmax(self.z[self.L]) return self.a[self.L]
def inference(self, inputs): if config.mode is "fcn": fm = utils.conv_with_bn(inputs, out_channels=12, filter_size=[config.time_width, 13], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_1") fm = utils.conv_with_bn(fm, out_channels=16, filter_size=[config.time_width, 11], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_2") fm = utils.conv_with_bn(fm, out_channels=20, filter_size=[config.time_width, 9], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_3") fm_skip = utils.conv_with_bn(fm, out_channels=24, filter_size=[config.time_width, 7], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_4") fm = utils.conv_with_bn(fm_skip, out_channels=32, filter_size=[config.time_width, 7], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_5") fm = utils.conv_with_bn(fm, out_channels=24, filter_size=[config.time_width, 7], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_6") + fm_skip fm = utils.conv_with_bn(fm, out_channels=20, filter_size=[config.time_width, 9], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_7") fm = utils.conv_with_bn(fm, out_channels=16, filter_size=[config.time_width, 11], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_8") fm = utils.conv_with_bn(fm, out_channels=12, filter_size=[config.time_width, 13], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_9") fm = utils.conv_with_bn(fm, out_channels=1, filter_size=[config.time_width, config.freq_size], stride=1, act='linear', is_training=self._is_training, padding="SAME", name="conv_10") # (batch_size, 1, config.freq_size, 1) # fm = utils.conv_with_bn(fm, out_channels=1, filter_size=[config.time_width, 1], # stride=1, act='linear', is_training=self._is_training, # padding="VALID", name="conv_last") fm = tf.squeeze(fm, [1, 3]) return fm elif config.mode is "fnn": keep_prob = self.keep_prob # inputs = tf.reshape(tf.squeeze(inputs, [3]), (-1, int(config.time_width*config.freq_size))) # inputs = tf.nn.dropout(inputs, keep_prob=keep_prob) # # h1 = tf.nn.relu(utils.batch_norm_affine_transform(inputs, 2048, name='hidden_1', # is_training=self._is_training)) # # h1 = tf.nn.dropout(h1, keep_prob=keep_prob) # # h2 = tf.nn.relu(utils.batch_norm_affine_transform(h1, 2048, name='hidden_2', # is_training=self._is_training)) # # h2 = tf.nn.dropout(h2, keep_prob=keep_prob) # # # h3 = tf.nn.relu(utils.batch_norm_affine_transform(h2, 2048, name='hidden_3', # # is_training=self._is_training)) # # h3 = tf.nn.dropout(h3, keep_prob=keep_prob) # # fm = utils.affine_transform(h2, config.freq_size, name='logits') inputs = tf.reshape(tf.squeeze(inputs, [3]), (-1, int(config.time_width*config.freq_size))) inputs = tf.nn.dropout(inputs, keep_prob=keep_prob) h1 = tf.nn.selu(utils.affine_transform(inputs, 2048, name='hidden_1')) h1 = tf.nn.dropout(h1, keep_prob=keep_prob) h2 = tf.nn.selu(utils.affine_transform(h1, 2048, name='hidden_2')) h2 = tf.nn.dropout(h2, keep_prob=keep_prob) h3 = tf.nn.selu(utils.affine_transform(h2, 2048, name='hidden_3')) h3 = tf.nn.dropout(h3, keep_prob=keep_prob) fm = utils.affine_transform(h3, config.freq_size, name='logits') return fm elif config.mode is "irm": keep_prob = self.keep_prob # inputs = tf.reshape(tf.squeeze(inputs, [3]), (-1, int(config.time_width*config.freq_size))) # inputs = tf.nn.dropout(inputs, keep_prob=keep_prob) # # h1 = tf.nn.relu(utils.batch_norm_affine_transform(inputs, 2048, name='hidden_1', # is_training=self._is_training)) # # h1 = tf.nn.dropout(h1, keep_prob=keep_prob) # # h2 = tf.nn.relu(utils.batch_norm_affine_transform(h1, 2048, name='hidden_2', # is_training=self._is_training)) # # h2 = tf.nn.dropout(h2, keep_prob=keep_prob) # # # h3 = tf.nn.relu(utils.batch_norm_affine_transform(h2, 2048, name='hidden_3', # # is_training=self._is_training)) # # h3 = tf.nn.dropout(h3, keep_prob=keep_prob) # # fm = utils.affine_transform(h2, config.freq_size, name='logits') inputs = tf.reshape(tf.squeeze(inputs, [3]), (-1, int(config.time_width*config.freq_size))) inputs = tf.nn.dropout(inputs, keep_prob=keep_prob) h1 = tf.nn.selu(utils.affine_transform(inputs, 2048, name='hidden_1')) h1 = tf.nn.dropout(h1, keep_prob=keep_prob) h2 = tf.nn.selu(utils.affine_transform(h1, 2048, name='hidden_2')) h2 = tf.nn.dropout(h2, keep_prob=keep_prob) h3 = tf.nn.selu(utils.affine_transform(h2, 2048, name='hidden_3')) h3 = tf.nn.dropout(h3, keep_prob=keep_prob) fm = utils.affine_transform(h3, config.freq_size, name='logits') return fm elif config.mode is "sfnn": keep_prob = self.keep_prob skip_inputs = tf.squeeze(inputs[:, int(config.time_width/2), :]) inputs = tf.reshape(tf.squeeze(inputs, [3]), (-1, int(config.time_width*config.freq_size))) inputs = tf.nn.dropout(inputs, keep_prob=keep_prob) h1 = tf.nn.selu(utils.affine_transform(inputs, 2048, name='hidden_1')) h1 = tf.nn.dropout(h1, keep_prob=keep_prob) h2 = tf.nn.selu(utils.affine_transform(h1, 2048, name='hidden_2')) h2 = tf.nn.dropout(h2, keep_prob=keep_prob) h3 = tf.nn.selu(utils.affine_transform(h2, 2048, name='hidden_3')) h3 = tf.nn.dropout(h3, keep_prob=keep_prob) fm = utils.affine_transform(h3, config.freq_size, name='logits') fm = fm + skip_inputs return fm elif config.mode is "lstm": keep_prob = self.keep_prob # inputs = tf.squeeze(inputs)[:, int(config.time_width/2), :] # inputs = tf.reshape(inputs, (-1, config.time_width, config.freq_size)) # time_width == num_steps # inputs = tf.nn.dropout(inputs, keep_prob=keep_prob) num_units = [1024, 1024] cells = [tf.nn.rnn_cell.LSTMCell(num_units=n, state_is_tuple=True) for n in num_units] cell = tf.nn.rnn_cell.MultiRNNCell(cells=cells, state_is_tuple=True) cell = tf.contrib.rnn.OutputProjectionWrapper(cell, output_size=config.freq_size) outputs, _state = tf.nn.dynamic_rnn(cell, inputs, time_major=False, dtype=tf.float32) fm = tf.reshape(outputs,[-1, config.freq_size]) return fm elif config.mode is "tsn": conv_inputs = tf.squeeze(tf.transpose(inputs, [0, 2, 1, 3]), axis=3) keep_prob = self.keep_prob # inputs = tf.reshape(tf.squeeze(inputs, [3]), (-1, int(config.time_width*config.freq_size))) # inputs = tf.nn.dropout(inputs, keep_prob=keep_prob) # # h1 = tf.nn.relu(utils.batch_norm_affine_transform(inputs, 2048, name='hidden_1', # is_training=self._is_training)) # # h1 = tf.nn.dropout(h1, keep_prob=keep_prob) # # h2 = tf.nn.relu(utils.batch_norm_affine_transform(h1, 2048, name='hidden_2', # is_training=self._is_training)) # # h2 = tf.nn.dropout(h2, keep_prob=keep_prob) # # # h3 = tf.nn.relu(utils.batch_norm_affine_transform(h2, 2048, name='hidden_3', # # is_training=self._is_training)) # # h3 = tf.nn.dropout(h3, keep_prob=keep_prob) # # fm = utils.affine_transform(h2, config.freq_size, name='logits') skip_inputs = tf.squeeze(inputs)[:, int(config.time_width / 2), :] skip_inputs = tf.squeeze(inputs, axis=3) inputs = tf.reshape(tf.squeeze(inputs, [3]), (-1, int(config.time_width * config.freq_size))) inputs = tf.nn.dropout(inputs, keep_prob=keep_prob) h1 = tf.nn.selu(utils.affine_transform(inputs, 1024, name='hidden_1')) h1 = tf.nn.dropout(h1, keep_prob=keep_prob) h2 = tf.nn.selu(utils.affine_transform(h1, 1024, name='hidden_2')) h2 = tf.nn.dropout(h2, keep_prob=keep_prob) h3 = tf.nn.selu(utils.affine_transform(h2, 1024, name='hidden_3')) h3 = tf.nn.dropout(h3, keep_prob=keep_prob) fm = utils.affine_transform(h3, int(config.freq_size * config.time_width), name='logits') fm = tf.reshape(fm, (-1, config.time_width, config.freq_size)) pad = tf.zeros((1, config.freq_size * int(config.time_width / 2), config.time_width, 1)) conv_fm = tf.reshape(tf.transpose(tf.expand_dims(fm, axis=3), [0, 2, 1, 3]), (1, -1, config.time_width, 1)) conv_fm = tf.concat([pad, conv_fm, pad], axis=1) conv_fm = utils.extract_patch(tf.squeeze(conv_fm), patch_size=(config.freq_size * config.time_width, config.time_width)) conv_fm = tf.stack(tf.split(conv_fm, num_or_size_splits=config.time_width, axis=1), axis=3) conv_fm = tf.reshape(conv_fm, (-1, config.freq_size, config.time_width * config.time_width)) # att_inputs = tf.reshape(conv_fm, (-1, config.freq_size*config.time_width*config.time_width)) # h4 = tf.nn.selu(utils.affine_transform(att_inputs, 1024, name='hidden_4')) # h4 = tf.nn.dropout(h4, keep_prob=keep_prob) # h5 = tf.nn.selu(utils.affine_transform(h4, config.time_width*config.time_width, name='hidden_5')) # att_outputs = tf.expand_dims(tf.nn.softmax(h5), axis=1) conv_fm = tf.concat([conv_fm, conv_inputs], axis=2) conv_fm = tf.expand_dims(conv_fm, axis=2) conv_1 = utils.conv_with_bn_2(conv_fm, 256, filter_size=[5, 1], stride=1, act='relu', scale=True, is_training=self._is_training, padding="SAME", name='conv_1') conv_2 = utils.conv_with_bn_2(conv_1, 128, filter_size=[5, 1], stride=1, act='relu', scale=True, is_training=self._is_training, padding="SAME", name='conv_2') conv_3 = utils.conv_with_bn_2(conv_2, 64, filter_size=[5, 1], stride=1, act='relu', scale=True, is_training=self._is_training, padding="SAME", name='conv_3') conv_4 = utils.conv_with_bn_2(conv_3, 32, filter_size=[5, 1], stride=1, act='relu', scale=True, is_training=self._is_training, padding="SAME", name='conv_4') conv_5 = utils.conv_with_bn_2(conv_4, 32, filter_size=[5, 1], stride=1, act='relu', scale=True, is_training=self._is_training, padding="SAME", name='conv_5') conv_6 = utils.conv_with_bn_2(conv_5, 32, filter_size=[5, 1], stride=1, act='relu', scale=True, is_training=self._is_training, padding="SAME", name='conv_6') conv_7 = utils.conv_with_bn_2(conv_6, 32, filter_size=[5, 1], stride=1, act='relu', scale=True, is_training=self._is_training, padding="SAME", name='conv_7') conv_8 = utils.conv_with_bn_2(conv_7, 1, filter_size=[5, 1], stride=1, act='relu', scale=False, is_training=self._is_training, padding="SAME", name='conv_8') conv_9 = tf.squeeze(tf.squeeze(conv_8, axis=2), axis=2) return fm, conv_9
plt.figure(figsize=(8, 8)) plt.subplot(221) plt.title('image') io.imshow(img) plt.subplot(222) plt.title('label') io.imshow(label) plt.subplot(223) plt.title('affine') io.imshow(img_tf) plt.subplot(224) plt.title('affine') io.imshow(label_tf) if 0: X_aft, Y_aft = affine_transform(X_train, Y_train) ix = 436 img = X_train[ix] label = np.squeeze(Y_train[ix]) img_tf = X_aft[ix] label_tf = np.squeeze(Y_aft[ix]) plt.figure(figsize=(8, 8)) plt.subplot(221) plt.title('image') io.imshow(img) plt.subplot(222) plt.title('label') io.imshow(label) plt.subplot(223) plt.title('affine') io.imshow(img_tf)
def inference(self, reuse=None): # initialization raw_inputs = self.inputs batch_size = self.batch_size keep_prob = self.keep_probability is_training = self.is_training tf.set_random_seed(SEED) # initialize the random seed at graph level lstm_cell = rnn.LayerNormBasicLSTMCell(lstm_cell_size, dropout_keep_prob=keep_prob, reuse=reuse, dropout_prob_seed=SEED) # lstm_cell = rnn.BasicRNNCell(lstm_cell_size, reuse=reuse) initial_state = lstm_cell.zero_state(batch_size, tf.float32) init_sw = tf.ones([batch_size, int(bdnn_winlen)]) * 0 # start sign self.mean_bps.append(init_sw) init_sw = tf.cast(tf.greater(init_sw, 0.4), tf.float32) self.sampled_bps.append(init_sw) reuse_recurrent = False init_glimpse = self.get_glimpse( raw_inputs, init_sw, reuse=reuse_recurrent) # (batch_size, glimpse_out) inputs = [0] * nGlimpses outputs = [0] * nGlimpses glimpse = init_glimpse for time_step in range(nGlimpses): if time_step == 0: with tf.variable_scope("core_network", reuse=reuse_recurrent): (cell_output, cell_state) = lstm_cell(glimpse, initial_state) self.cell_outputs.append(initial_state) else: reuse_recurrent = True with tf.variable_scope("core_network", reuse=reuse_recurrent): (cell_output, cell_state) = lstm_cell(glimpse, cell_state) inputs[time_step] = glimpse outputs[time_step] = cell_output if time_step != nGlimpses - 1: # not final time_step glimpse = self.get_next_input(cell_output, reuse=reuse_recurrent) else: # final time_step with tf.variable_scope("baseline", reuse=reuse_recurrent): baseline = tf.sigmoid( utils.affine_transform(((cell_output)), 1, name='baseline')) self.baselines.append(baseline) return outputs
def calc_reward(self, outputs): batch_size = self.batch_size # consider the action at the last time step outputs = outputs[-1] outputs = tf.reshape(outputs, (batch_size, lstm_cell_size)) # get the baseline b = tf.stack(self.baselines) b = tf.tile(b, [1, 1, 1]) b = tf.reshape(tf.transpose(b, [1, 0, 2]), [batch_size, nGlimpses]) no_grad_b = tf.stop_gradient(b) # get the action action_out = self.action_network(outputs) logits = tf.sigmoid( utils.affine_transform(action_out, int(bdnn_outputsize), seed=SEED, name="softmax")) result, soft_result = self.bdnn_prediction(logits, threshold=rf_threshold) # convert list of tensors to one big tensor mean_bps = tf.concat(axis=0, values=self.mean_bps) mean_bps = tf.reshape(mean_bps, (nGlimpses, self.batch_size, int(bdnn_winlen))) mean_bps = tf.transpose(mean_bps, [1, 0, 2]) sampled_bps = tf.concat(axis=0, values=self.sampled_bps) sampled_bps = tf.reshape( sampled_bps, (nGlimpses, self.batch_size, int(bdnn_winlen))) sampled_bps = tf.transpose(sampled_bps, [1, 0, 2]) # reward for all examples in the batch raw_indx = int(np.floor(bdnn_outputsize / 2)) raw_labels = self.labels[:, raw_indx] raw_labels = tf.reshape(raw_labels, shape=(-1, 1)) R = tf.cast(tf.equal(result, raw_labels), tf.float32) soft_R = tf.stop_gradient( tf.cast(tf.abs(tf.subtract(1 - soft_result, raw_labels)), tf.float32)) soft_R = tf.reshape(soft_R, (batch_size, 1)) soft_R = tf.tile(soft_R, [1, nGlimpses]) # R = tf.cast(tf.abs(tf.subtract(1 - soft_result, raw_labels)), tf.float32) R = tf.stop_gradient(R) R = tf.reshape(R, (batch_size, 1)) self.raw_reward = R R = tf.tile(R, [1, nGlimpses]) reward = tf.reduce_mean(R) # select the window p_bps = multinomial_pmf(mean_bps, sampled_bps) p_bps = tf.reshape(p_bps, (self.batch_size, nGlimpses)) # define the cost function sv_part = -tf.square(self.labels - logits) # rf_part = tf.log(p_bps + SMALL_NUM) * (soft_R - no_grad_b) rf_part = tf.log(p_bps + SMALL_NUM) * (R - no_grad_b) # J = sv_part J = tf.concat(axis=1, values=[sv_part, rf_part]) # comment for sv only J = tf.reduce_sum(J, 1) J = J - tf.reduce_mean(tf.square(R - b), 1) # comment for sv only J = tf.reduce_mean(J, 0) # cost = -J cost = -tf.reduce_mean(J) var_list = tf.trainable_variables() grads = tf.gradients(cost, var_list) grads, _ = tf.clip_by_global_norm(grads, clip_th) optimizer = tf.train.AdamOptimizer(self.lr) train_op = optimizer.apply_gradients(zip(grads, var_list), global_step=self.global_step) return cost, reward, train_op, tf.reduce_mean(b), tf.reduce_mean(R - b), \ sampled_bps, tf.reduce_mean(p_bps), self.lr, soft_result, raw_labels
def inference(self, inputs): if config.mode is "fcn": fm = utils.conv_with_bn(inputs, out_channels=12, filter_size=[config.time_width, 13], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_1") fm = utils.conv_with_bn(fm, out_channels=16, filter_size=[config.time_width, 11], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_2") fm = utils.conv_with_bn(fm, out_channels=20, filter_size=[config.time_width, 9], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_3") fm_skip = utils.conv_with_bn(fm, out_channels=24, filter_size=[config.time_width, 7], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_4") fm = utils.conv_with_bn(fm_skip, out_channels=32, filter_size=[config.time_width, 7], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_5") fm = utils.conv_with_bn(fm, out_channels=24, filter_size=[config.time_width, 7], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_6") + fm_skip fm = utils.conv_with_bn(fm, out_channels=20, filter_size=[config.time_width, 9], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_7") fm = utils.conv_with_bn(fm, out_channels=16, filter_size=[config.time_width, 11], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_8") fm = utils.conv_with_bn(fm, out_channels=12, filter_size=[config.time_width, 13], stride=1, act='relu', is_training=self._is_training, padding="SAME", name="conv_9") fm = utils.conv_with_bn( fm, out_channels=1, filter_size=[config.time_width, config.freq_size], stride=1, act='linear', is_training=self._is_training, padding="SAME", name="conv_10") # (batch_size, 1, config.freq_size, 1) # fm = utils.conv_with_bn(fm, out_channels=1, filter_size=[config.time_width, 1], # stride=1, act='linear', is_training=self._is_training, # padding="VALID", name="conv_last") fm = tf.squeeze(fm, [1, 3]) elif config.mode is "fnn": keep_prob = self.keep_prob # inputs = tf.reshape(tf.squeeze(inputs, [3]), (-1, int(config.time_width*config.freq_size))) # inputs = tf.nn.dropout(inputs, keep_prob=keep_prob) # # h1 = tf.nn.relu(utils.batch_norm_affine_transform(inputs, 2048, name='hidden_1', # is_training=self._is_training)) # # h1 = tf.nn.dropout(h1, keep_prob=keep_prob) # # h2 = tf.nn.relu(utils.batch_norm_affine_transform(h1, 2048, name='hidden_2', # is_training=self._is_training)) # # h2 = tf.nn.dropout(h2, keep_prob=keep_prob) # # # h3 = tf.nn.relu(utils.batch_norm_affine_transform(h2, 2048, name='hidden_3', # # is_training=self._is_training)) # # h3 = tf.nn.dropout(h3, keep_prob=keep_prob) # # fm = utils.affine_transform(h2, config.freq_size, name='logits') inputs = tf.reshape(tf.squeeze( inputs, [3]), (-1, int(config.time_width * config.freq_size))) inputs = tf.nn.dropout(inputs, keep_prob=keep_prob) h1 = tf.nn.selu( utils.affine_transform(inputs, 2048, name='hidden_1')) h1 = tf.nn.dropout(h1, keep_prob=keep_prob) h2 = tf.nn.selu(utils.affine_transform(h1, 2048, name='hidden_2')) h2 = tf.nn.dropout(h2, keep_prob=keep_prob) h3 = tf.nn.selu(utils.affine_transform(h2, 2048, name='hidden_3')) h3 = tf.nn.dropout(h3, keep_prob=keep_prob) fm = utils.affine_transform(h3, config.freq_size, name='logits') return fm