Exemplo n.º 1
0
 def hidden_forward(self, l):
     w, b = self.get_w_b(l)
     self.z[l] = affine_transform(w, self.a[l - 1], b)
     if self.activation == TANH:
         self.a[l] = np.tanh(self.z[l])
     elif self.activation == RELU:
         self.a[l] = np.maximum(0, self.z[l])
Exemplo n.º 2
0
    def __getitem__(self, idx):
        inputs = torch.zeros((self.temporal, 1, 256, 256))
        targets = torch.zeros((self.temporal, 13, 64, 64))
        target_weights = torch.zeros((self.temporal, 13, 3))
        cs = torch.zeros(self.temporal, 2)
        ss = torch.zeros(self.temporal, 2)
        joints_ture = torch.zeros(self.temporal, 13, 3)

        for k in range(self.temporal):
            img_path = self.temporal_dir[idx][k]
            joint = self.labels[idx][k]
            joint_true = self.joint_true[idx][k]
            data_numpy = mpimg.imread(img_path)
            # self.plot_2d(data_numpy, joint)
            joint = np.array(joint)
            joint_true = np.array(joint_true)
            u = joint[:, 0]
            v = joint[:, 1]
            c = np.array([(max(u) + min(u)) / 2, (max(v) + min(v)) / 2],
                         dtype=np.float)
            c = np.array(c, dtype=np.float)
            s = (max(v) - min(v)) * 0.0065
            s = np.array([s, s], dtype=np.float)
            if c[0] != -1:
                c[1] = c[1] + 15 * s[1]
                s = s * 1.25
            c = c - 1
            r = 0
            cs[k] = torch.from_numpy(c)
            ss[k] = torch.from_numpy(s)
            joints_ture[k] = torch.from_numpy(joint_true)
            trans = get_affine_transform(c, s, r, self.image_size)
            input = cv2.warpAffine(
                data_numpy,
                trans, (int(self.image_size[0]), int(self.image_size[1])),
                flags=cv2.INTER_LINEAR)
            for i in range(13):
                # if joints_vis[i, 0] > 0.0:
                joint[i, 0:2] = affine_transform(joint[i, 0:2], trans)
            # print(joints_vis)
            target, target_weight = self.generate_target(joint, joint)
            target = torch.from_numpy(target)
            target_weight = torch.from_numpy(target_weight)
            aa = torch.zeros((1, 256, 256))  # simple
            aa[0, :, :] = torch.from_numpy(input)
            input = aa
            inputs[k] = input
            targets[k] = target
            target_weights[k] = target_weight
        if self.train:
            return inputs, targets, target_weights
        else:
            return inputs, targets, target_weights, cs, ss, joints_ture, data_numpy, self.temporal_dir[
                idx]
Exemplo n.º 3
0
    def predict(self, image: np.ndarray) -> list:
        src_image = image.copy()
        c = np.array([image.shape[1] / 2., image.shape[0] / 2.],
                     dtype=np.float32)
        s = max(image.shape[0], image.shape[1]) * 1.0
        tgt_w = 512
        tgt_h = 512
        image = preprocess_image(image, c, s, tgt_w=tgt_w, tgt_h=tgt_h)
        if self.flip_test:
            flipped_image = image[:, ::-1]
            input = np.stack([image, flipped_image], axis=0)
        else:
            input = np.expand_dims(image, axis=0)

        with self.sess.graph.as_default():
            predictions = self.prediction_model.predict_on_batch(input)[0]

        scores = predictions[:, 4]
        indices = np.where(scores > self.score_threshold)[
            0]  # select indices which have a score above the threshold

        # select those detections
        predictions = predictions[indices]
        predictions = predictions.astype(np.float64)
        trans = get_affine_transform(c, s, (tgt_w // 4, tgt_h // 4), inv=1)

        for j in range(predictions.shape[0]):
            predictions[j, 0:2] = affine_transform(predictions[j, 0:2], trans)
            predictions[j, 2:4] = affine_transform(predictions[j, 2:4], trans)

        predictions[:, [0, 2]] = np.clip(predictions[:, [0, 2]], 0,
                                         src_image.shape[1])
        predictions[:, [1, 3]] = np.clip(predictions[:, [1, 3]], 0,
                                         src_image.shape[0])

        return predictions
Exemplo n.º 4
0
def chebys_tracer(coef_ord_combos, time_points, zipped: bool = False):
    "Generate pretraining samples that follow Chebyshev polinomials."
    safeguard = 0.1
    coef_ord_tuples = []
    controls = []
    for coef_ord_tuple, fun in chebys_generator(coef_ord_combos):
        control = []
        for t in time_points:
            c = fun(t)
            c = affine_transform(c, -1, 1, 0 + safeguard, 5 - safeguard)
            control.append(c)
        coef_ord_tuples.append(coef_ord_tuple)
        controls.append(control)

    if zipped:
        return coef_ord_tuples, list(zip(*controls))
    return coef_ord_tuples, controls
Exemplo n.º 5
0
    def time_attention(self, inputs, reuse=None):
        with tf.variable_scope("attention", reuse=reuse):
            cell_fw = [tf.contrib.rnn.LSTMCell(size, initializer=tf.contrib.layers.xavier_initializer(),
                                               reuse=reuse) for size in [self.num_cell_time]*self.num_layers]

            cell_bw = [tf.contrib.rnn.LSTMCell(size, initializer=tf.contrib.layers.xavier_initializer(),
                                               reuse=reuse) for size in [self.num_cell_time]*self.num_layers]

            cell_fw = tf.nn.rnn_cell.MultiRNNCell(cells=cell_fw, state_is_tuple=True)
            cell_bw = tf.nn.rnn_cell.MultiRNNCell(cells=cell_bw, state_is_tuple=True)

            cell_out_list = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs,
                                                            sequence_length=self.length(inputs), dtype=tf.float32)[0]
            cell_output = tf.reshape(tf.concat([cell_out_list[0], cell_out_list[1]], 2), shape=[-1,
                                                                                                data_step*self.num_cell_time*2])

            t_attention = tf.expand_dims(tf.sigmoid(utils.affine_transform(cell_output, data_step,
                                                                           seed=0, name='sigmoid')), axis=2)
            return t_attention
Exemplo n.º 6
0
    def inference(self, inputs, reuse=None):

        with tf.variable_scope("classification", reuse=reuse):
            cell_fw = [tf.contrib.rnn.LSTMCell(size, initializer=tf.contrib.layers.xavier_initializer(),
                                               reuse=reuse) for size in [self.num_cell]*self.num_layers]

            cell_bw = [tf.contrib.rnn.LSTMCell(size, initializer=tf.contrib.layers.xavier_initializer(),
                                               reuse=reuse) for size in [self.num_cell]*self.num_layers]

            cell_fw = tf.nn.rnn_cell.MultiRNNCell(cells=cell_fw, state_is_tuple=True)
            cell_bw = tf.nn.rnn_cell.MultiRNNCell(cells=cell_bw, state_is_tuple=True)

            cell_out_list = tf.nn.bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs,
                                                            sequence_length=self.length(inputs), dtype=tf.float32)[0]
            cell_out_fw = self.last_relevant(cell_out_list[0], self.length(inputs))
            cell_out_bw = self.last_relevant(tf.reverse(cell_out_list[1], axis=[1]), self.length(inputs))

            cell_output = tf.concat([cell_out_fw, cell_out_bw], 1)
            logits = utils.affine_transform(cell_output, self.num_class, seed=0, name='softmax_logits')
            return logits
Exemplo n.º 7
0
    def get_next_input(self, cell_output, reuse=None):

        raw_inputs = self.inputs
        is_training = self.is_training

        with tf.variable_scope("baseline", reuse=reuse):

            baseline = tf.sigmoid(
                utils.affine_transform((((cell_output))), 1, name='baseline'))

            self.baselines.append(baseline)

        with tf.variable_scope("selection_network", reuse=reuse):

            mean_bp = smooth_softmax(
                utils.batch_norm_affine_transform((cell_output),
                                                  int(bdnn_winlen),
                                                  decay=decay,
                                                  name='selection',
                                                  is_training=is_training))
            # mean_bp = softmax(
            #     utils.batch_norm_affine_transform(cell_output, int(bdnn_winlen), decay=decay, name='selection',
            #                                       is_training=is_training), beta)

            self.mean_bps.append(mean_bp)

            # rand_seq = tf.random_uniform(mean_bp.get_shape().as_list(), minval=0, maxval=1, seed=SEED)

            if is_training:
                sampled_bp = tf.multinomial(mean_bp, num_samples=1, seed=SEED)
                sampled_bp = utils.onehot_tensor(sampled_bp, bdnn_winlen)
            else:
                sampled_bp = mean_bp

            sampled_bp = tf.stop_gradient(sampled_bp)

            self.sampled_bps.append(sampled_bp)

        return self.get_glimpse(raw_inputs, mean_bp, reuse=True)
Exemplo n.º 8
0
 def last_forward(self):
     w, b = self.get_w_b(self.L)
     self.z[self.L] = affine_transform(w, self.a[self.L - 1], b)
     self.a[self.L] = softmax(self.z[self.L])
     return self.a[self.L]
Exemplo n.º 9
0
    def inference(self, inputs):
        if config.mode is "fcn":
            fm = utils.conv_with_bn(inputs, out_channels=12, filter_size=[config.time_width, 13],
                                    stride=1, act='relu', is_training=self._is_training,
                                    padding="SAME", name="conv_1")

            fm = utils.conv_with_bn(fm, out_channels=16, filter_size=[config.time_width, 11],
                                    stride=1, act='relu', is_training=self._is_training,
                                    padding="SAME", name="conv_2")

            fm = utils.conv_with_bn(fm, out_channels=20, filter_size=[config.time_width, 9],
                                    stride=1, act='relu', is_training=self._is_training,
                                    padding="SAME", name="conv_3")

            fm_skip = utils.conv_with_bn(fm, out_channels=24, filter_size=[config.time_width, 7],
                                    stride=1, act='relu', is_training=self._is_training,
                                    padding="SAME", name="conv_4")

            fm = utils.conv_with_bn(fm_skip, out_channels=32, filter_size=[config.time_width, 7],
                                    stride=1, act='relu', is_training=self._is_training,
                                    padding="SAME", name="conv_5")

            fm = utils.conv_with_bn(fm, out_channels=24, filter_size=[config.time_width, 7],
                                    stride=1, act='relu', is_training=self._is_training,
                                    padding="SAME", name="conv_6") + fm_skip

            fm = utils.conv_with_bn(fm, out_channels=20, filter_size=[config.time_width, 9],
                                    stride=1, act='relu', is_training=self._is_training,
                                    padding="SAME", name="conv_7")

            fm = utils.conv_with_bn(fm, out_channels=16, filter_size=[config.time_width, 11],
                                    stride=1, act='relu', is_training=self._is_training,
                                    padding="SAME", name="conv_8")

            fm = utils.conv_with_bn(fm, out_channels=12, filter_size=[config.time_width, 13],
                                    stride=1, act='relu', is_training=self._is_training,
                                    padding="SAME", name="conv_9")

            fm = utils.conv_with_bn(fm, out_channels=1, filter_size=[config.time_width, config.freq_size],
                                    stride=1, act='linear', is_training=self._is_training,
                                    padding="SAME", name="conv_10")  # (batch_size, 1, config.freq_size, 1)

            # fm = utils.conv_with_bn(fm, out_channels=1, filter_size=[config.time_width, 1],
            #                         stride=1, act='linear', is_training=self._is_training,
            #                         padding="VALID", name="conv_last")
            fm = tf.squeeze(fm, [1, 3])

            return fm

        elif config.mode is "fnn":

            keep_prob = self.keep_prob

            # inputs = tf.reshape(tf.squeeze(inputs, [3]), (-1, int(config.time_width*config.freq_size)))
            # inputs = tf.nn.dropout(inputs, keep_prob=keep_prob)
            #
            # h1 = tf.nn.relu(utils.batch_norm_affine_transform(inputs, 2048, name='hidden_1',
            #                                                         is_training=self._is_training))
            # # h1 = tf.nn.dropout(h1, keep_prob=keep_prob)
            #
            # h2 = tf.nn.relu(utils.batch_norm_affine_transform(h1, 2048, name='hidden_2',
            #                                                         is_training=self._is_training))
            # # h2 = tf.nn.dropout(h2, keep_prob=keep_prob)
            #
            # # h3 = tf.nn.relu(utils.batch_norm_affine_transform(h2, 2048, name='hidden_3',
            # #                                                         is_training=self._is_training))
            # # h3 = tf.nn.dropout(h3, keep_prob=keep_prob)
            #
            # fm = utils.affine_transform(h2, config.freq_size, name='logits')

            inputs = tf.reshape(tf.squeeze(inputs, [3]), (-1, int(config.time_width*config.freq_size)))
            inputs = tf.nn.dropout(inputs, keep_prob=keep_prob)

            h1 = tf.nn.selu(utils.affine_transform(inputs, 2048, name='hidden_1'))
            h1 = tf.nn.dropout(h1, keep_prob=keep_prob)

            h2 = tf.nn.selu(utils.affine_transform(h1, 2048, name='hidden_2'))
            h2 = tf.nn.dropout(h2, keep_prob=keep_prob)

            h3 = tf.nn.selu(utils.affine_transform(h2, 2048, name='hidden_3'))
            h3 = tf.nn.dropout(h3, keep_prob=keep_prob)

            fm = utils.affine_transform(h3, config.freq_size, name='logits')

            return fm

        elif config.mode is "irm":

            keep_prob = self.keep_prob

            # inputs = tf.reshape(tf.squeeze(inputs, [3]), (-1, int(config.time_width*config.freq_size)))
            # inputs = tf.nn.dropout(inputs, keep_prob=keep_prob)
            #
            # h1 = tf.nn.relu(utils.batch_norm_affine_transform(inputs, 2048, name='hidden_1',
            #                                                         is_training=self._is_training))
            # # h1 = tf.nn.dropout(h1, keep_prob=keep_prob)
            #
            # h2 = tf.nn.relu(utils.batch_norm_affine_transform(h1, 2048, name='hidden_2',
            #                                                         is_training=self._is_training))
            # # h2 = tf.nn.dropout(h2, keep_prob=keep_prob)
            #
            # # h3 = tf.nn.relu(utils.batch_norm_affine_transform(h2, 2048, name='hidden_3',
            # #                                                         is_training=self._is_training))
            # # h3 = tf.nn.dropout(h3, keep_prob=keep_prob)
            #
            # fm = utils.affine_transform(h2, config.freq_size, name='logits')

            inputs = tf.reshape(tf.squeeze(inputs, [3]), (-1, int(config.time_width*config.freq_size)))
            inputs = tf.nn.dropout(inputs, keep_prob=keep_prob)

            h1 = tf.nn.selu(utils.affine_transform(inputs, 2048, name='hidden_1'))
            h1 = tf.nn.dropout(h1, keep_prob=keep_prob)

            h2 = tf.nn.selu(utils.affine_transform(h1, 2048, name='hidden_2'))
            h2 = tf.nn.dropout(h2, keep_prob=keep_prob)

            h3 = tf.nn.selu(utils.affine_transform(h2, 2048, name='hidden_3'))
            h3 = tf.nn.dropout(h3, keep_prob=keep_prob)

            fm = utils.affine_transform(h3, config.freq_size, name='logits')

            return fm

        elif config.mode is "sfnn":

            keep_prob = self.keep_prob
            skip_inputs = tf.squeeze(inputs[:, int(config.time_width/2), :])
            inputs = tf.reshape(tf.squeeze(inputs, [3]), (-1, int(config.time_width*config.freq_size)))
            inputs = tf.nn.dropout(inputs, keep_prob=keep_prob)

            h1 = tf.nn.selu(utils.affine_transform(inputs, 2048, name='hidden_1'))
            h1 = tf.nn.dropout(h1, keep_prob=keep_prob)

            h2 = tf.nn.selu(utils.affine_transform(h1, 2048, name='hidden_2'))
            h2 = tf.nn.dropout(h2, keep_prob=keep_prob)

            h3 = tf.nn.selu(utils.affine_transform(h2, 2048, name='hidden_3'))
            h3 = tf.nn.dropout(h3, keep_prob=keep_prob)

            fm = utils.affine_transform(h3, config.freq_size, name='logits')
            fm = fm + skip_inputs

            return fm
        elif config.mode is "lstm":

            keep_prob = self.keep_prob

            # inputs = tf.squeeze(inputs)[:, int(config.time_width/2), :]

            # inputs = tf.reshape(inputs, (-1, config.time_width, config.freq_size))  # time_width == num_steps
            # inputs = tf.nn.dropout(inputs, keep_prob=keep_prob)

            num_units = [1024, 1024]
            cells = [tf.nn.rnn_cell.LSTMCell(num_units=n, state_is_tuple=True) for n in num_units]

            cell = tf.nn.rnn_cell.MultiRNNCell(cells=cells, state_is_tuple=True)
            cell = tf.contrib.rnn.OutputProjectionWrapper(cell, output_size=config.freq_size)
            outputs, _state = tf.nn.dynamic_rnn(cell, inputs, time_major=False, dtype=tf.float32)
            fm = tf.reshape(outputs,[-1, config.freq_size])

            return fm

        elif config.mode is "tsn":
            conv_inputs = tf.squeeze(tf.transpose(inputs, [0, 2, 1, 3]), axis=3)

            keep_prob = self.keep_prob

            # inputs = tf.reshape(tf.squeeze(inputs, [3]), (-1, int(config.time_width*config.freq_size)))
            # inputs = tf.nn.dropout(inputs, keep_prob=keep_prob)
            #
            # h1 = tf.nn.relu(utils.batch_norm_affine_transform(inputs, 2048, name='hidden_1',
            #                                                         is_training=self._is_training))
            # # h1 = tf.nn.dropout(h1, keep_prob=keep_prob)
            #
            # h2 = tf.nn.relu(utils.batch_norm_affine_transform(h1, 2048, name='hidden_2',
            #                                                         is_training=self._is_training))
            # # h2 = tf.nn.dropout(h2, keep_prob=keep_prob)
            #
            # # h3 = tf.nn.relu(utils.batch_norm_affine_transform(h2, 2048, name='hidden_3',
            # #                                                         is_training=self._is_training))
            # # h3 = tf.nn.dropout(h3, keep_prob=keep_prob)
            #
            # fm = utils.affine_transform(h2, config.freq_size, name='logits')

            skip_inputs = tf.squeeze(inputs)[:, int(config.time_width / 2), :]
            skip_inputs = tf.squeeze(inputs, axis=3)

            inputs = tf.reshape(tf.squeeze(inputs, [3]), (-1, int(config.time_width * config.freq_size)))
            inputs = tf.nn.dropout(inputs, keep_prob=keep_prob)

            h1 = tf.nn.selu(utils.affine_transform(inputs, 1024, name='hidden_1'))
            h1 = tf.nn.dropout(h1, keep_prob=keep_prob)

            h2 = tf.nn.selu(utils.affine_transform(h1, 1024, name='hidden_2'))
            h2 = tf.nn.dropout(h2, keep_prob=keep_prob)

            h3 = tf.nn.selu(utils.affine_transform(h2, 1024, name='hidden_3'))
            h3 = tf.nn.dropout(h3, keep_prob=keep_prob)

            fm = utils.affine_transform(h3, int(config.freq_size * config.time_width), name='logits')
            fm = tf.reshape(fm, (-1, config.time_width, config.freq_size))

            pad = tf.zeros((1, config.freq_size * int(config.time_width / 2), config.time_width, 1))
            conv_fm = tf.reshape(tf.transpose(tf.expand_dims(fm, axis=3), [0, 2, 1, 3]),
                                 (1, -1, config.time_width, 1))
            conv_fm = tf.concat([pad, conv_fm, pad], axis=1)
            conv_fm = utils.extract_patch(tf.squeeze(conv_fm),
                                          patch_size=(config.freq_size * config.time_width, config.time_width))
            conv_fm = tf.stack(tf.split(conv_fm, num_or_size_splits=config.time_width, axis=1), axis=3)
            conv_fm = tf.reshape(conv_fm, (-1, config.freq_size, config.time_width * config.time_width))

            # att_inputs = tf.reshape(conv_fm, (-1, config.freq_size*config.time_width*config.time_width))
            # h4 = tf.nn.selu(utils.affine_transform(att_inputs, 1024, name='hidden_4'))
            # h4 = tf.nn.dropout(h4, keep_prob=keep_prob)
            # h5 = tf.nn.selu(utils.affine_transform(h4, config.time_width*config.time_width, name='hidden_5'))
            # att_outputs = tf.expand_dims(tf.nn.softmax(h5), axis=1)

            conv_fm = tf.concat([conv_fm, conv_inputs], axis=2)

            conv_fm = tf.expand_dims(conv_fm, axis=2)

            conv_1 = utils.conv_with_bn_2(conv_fm, 256, filter_size=[5, 1], stride=1, act='relu', scale=True,
                                          is_training=self._is_training, padding="SAME", name='conv_1')

            conv_2 = utils.conv_with_bn_2(conv_1, 128, filter_size=[5, 1], stride=1, act='relu', scale=True,
                                          is_training=self._is_training, padding="SAME", name='conv_2')

            conv_3 = utils.conv_with_bn_2(conv_2, 64, filter_size=[5, 1], stride=1, act='relu', scale=True,
                                          is_training=self._is_training, padding="SAME", name='conv_3')

            conv_4 = utils.conv_with_bn_2(conv_3, 32, filter_size=[5, 1], stride=1, act='relu', scale=True,
                                          is_training=self._is_training, padding="SAME", name='conv_4')

            conv_5 = utils.conv_with_bn_2(conv_4, 32, filter_size=[5, 1], stride=1, act='relu', scale=True,
                                          is_training=self._is_training, padding="SAME", name='conv_5')

            conv_6 = utils.conv_with_bn_2(conv_5, 32, filter_size=[5, 1], stride=1, act='relu', scale=True,
                                          is_training=self._is_training, padding="SAME", name='conv_6')
            conv_7 = utils.conv_with_bn_2(conv_6, 32, filter_size=[5, 1], stride=1, act='relu', scale=True,
                                          is_training=self._is_training, padding="SAME", name='conv_7')
            conv_8 = utils.conv_with_bn_2(conv_7, 1, filter_size=[5, 1], stride=1, act='relu', scale=False,
                                          is_training=self._is_training, padding="SAME", name='conv_8')
            conv_9 = tf.squeeze(tf.squeeze(conv_8, axis=2), axis=2)

        return fm, conv_9
Exemplo n.º 10
0
    plt.figure(figsize=(8, 8))
    plt.subplot(221)
    plt.title('image')
    io.imshow(img)
    plt.subplot(222)
    plt.title('label')
    io.imshow(label)
    plt.subplot(223)
    plt.title('affine')
    io.imshow(img_tf)
    plt.subplot(224)
    plt.title('affine')
    io.imshow(label_tf)

if 0:
    X_aft, Y_aft = affine_transform(X_train, Y_train)
    ix = 436
    img = X_train[ix]
    label = np.squeeze(Y_train[ix])
    img_tf = X_aft[ix]
    label_tf = np.squeeze(Y_aft[ix])
    plt.figure(figsize=(8, 8))
    plt.subplot(221)
    plt.title('image')
    io.imshow(img)
    plt.subplot(222)
    plt.title('label')
    io.imshow(label)
    plt.subplot(223)
    plt.title('affine')
    io.imshow(img_tf)
Exemplo n.º 11
0
    def inference(self, reuse=None):

        # initialization
        raw_inputs = self.inputs
        batch_size = self.batch_size
        keep_prob = self.keep_probability
        is_training = self.is_training

        tf.set_random_seed(SEED)  # initialize the random seed at graph level

        lstm_cell = rnn.LayerNormBasicLSTMCell(lstm_cell_size,
                                               dropout_keep_prob=keep_prob,
                                               reuse=reuse,
                                               dropout_prob_seed=SEED)
        # lstm_cell = rnn.BasicRNNCell(lstm_cell_size, reuse=reuse)

        initial_state = lstm_cell.zero_state(batch_size, tf.float32)

        init_sw = tf.ones([batch_size, int(bdnn_winlen)]) * 0  # start sign

        self.mean_bps.append(init_sw)

        init_sw = tf.cast(tf.greater(init_sw, 0.4), tf.float32)
        self.sampled_bps.append(init_sw)

        reuse_recurrent = False

        init_glimpse = self.get_glimpse(
            raw_inputs, init_sw,
            reuse=reuse_recurrent)  # (batch_size, glimpse_out)

        inputs = [0] * nGlimpses
        outputs = [0] * nGlimpses
        glimpse = init_glimpse

        for time_step in range(nGlimpses):

            if time_step == 0:
                with tf.variable_scope("core_network", reuse=reuse_recurrent):
                    (cell_output,
                     cell_state) = lstm_cell(glimpse, initial_state)
                    self.cell_outputs.append(initial_state)
            else:
                reuse_recurrent = True
                with tf.variable_scope("core_network", reuse=reuse_recurrent):
                    (cell_output, cell_state) = lstm_cell(glimpse, cell_state)

            inputs[time_step] = glimpse
            outputs[time_step] = cell_output

            if time_step != nGlimpses - 1:  # not final time_step

                glimpse = self.get_next_input(cell_output,
                                              reuse=reuse_recurrent)

            else:  # final time_step
                with tf.variable_scope("baseline", reuse=reuse_recurrent):

                    baseline = tf.sigmoid(
                        utils.affine_transform(((cell_output)),
                                               1,
                                               name='baseline'))

                    self.baselines.append(baseline)

        return outputs
Exemplo n.º 12
0
    def calc_reward(self, outputs):

        batch_size = self.batch_size

        # consider the action at the last time step

        outputs = outputs[-1]
        outputs = tf.reshape(outputs, (batch_size, lstm_cell_size))

        # get the baseline

        b = tf.stack(self.baselines)
        b = tf.tile(b, [1, 1, 1])
        b = tf.reshape(tf.transpose(b, [1, 0, 2]), [batch_size, nGlimpses])
        no_grad_b = tf.stop_gradient(b)

        # get the action

        action_out = self.action_network(outputs)
        logits = tf.sigmoid(
            utils.affine_transform(action_out,
                                   int(bdnn_outputsize),
                                   seed=SEED,
                                   name="softmax"))
        result, soft_result = self.bdnn_prediction(logits,
                                                   threshold=rf_threshold)

        # convert list of tensors to one big tensor

        mean_bps = tf.concat(axis=0, values=self.mean_bps)
        mean_bps = tf.reshape(mean_bps,
                              (nGlimpses, self.batch_size, int(bdnn_winlen)))
        mean_bps = tf.transpose(mean_bps, [1, 0, 2])

        sampled_bps = tf.concat(axis=0, values=self.sampled_bps)
        sampled_bps = tf.reshape(
            sampled_bps, (nGlimpses, self.batch_size, int(bdnn_winlen)))
        sampled_bps = tf.transpose(sampled_bps, [1, 0, 2])

        # reward for all examples in the batch

        raw_indx = int(np.floor(bdnn_outputsize / 2))
        raw_labels = self.labels[:, raw_indx]
        raw_labels = tf.reshape(raw_labels, shape=(-1, 1))
        R = tf.cast(tf.equal(result, raw_labels), tf.float32)
        soft_R = tf.stop_gradient(
            tf.cast(tf.abs(tf.subtract(1 - soft_result, raw_labels)),
                    tf.float32))
        soft_R = tf.reshape(soft_R, (batch_size, 1))
        soft_R = tf.tile(soft_R, [1, nGlimpses])

        # R = tf.cast(tf.abs(tf.subtract(1 - soft_result, raw_labels)), tf.float32)
        R = tf.stop_gradient(R)
        R = tf.reshape(R, (batch_size, 1))
        self.raw_reward = R
        R = tf.tile(R, [1, nGlimpses])
        reward = tf.reduce_mean(R)

        # select the window

        p_bps = multinomial_pmf(mean_bps, sampled_bps)
        p_bps = tf.reshape(p_bps, (self.batch_size, nGlimpses))

        # define the cost function
        sv_part = -tf.square(self.labels - logits)
        # rf_part = tf.log(p_bps + SMALL_NUM) * (soft_R - no_grad_b)
        rf_part = tf.log(p_bps + SMALL_NUM) * (R - no_grad_b)
        # J = sv_part

        J = tf.concat(axis=1, values=[sv_part, rf_part])  # comment for sv only
        J = tf.reduce_sum(J, 1)
        J = J - tf.reduce_mean(tf.square(R - b), 1)  # comment for sv only
        J = tf.reduce_mean(J, 0)

        # cost = -J

        cost = -tf.reduce_mean(J)
        var_list = tf.trainable_variables()
        grads = tf.gradients(cost, var_list)
        grads, _ = tf.clip_by_global_norm(grads, clip_th)
        optimizer = tf.train.AdamOptimizer(self.lr)
        train_op = optimizer.apply_gradients(zip(grads, var_list),
                                             global_step=self.global_step)

        return cost, reward, train_op, tf.reduce_mean(b), tf.reduce_mean(R - b), \
               sampled_bps, tf.reduce_mean(p_bps), self.lr, soft_result, raw_labels
Exemplo n.º 13
0
    def inference(self, inputs):
        if config.mode is "fcn":
            fm = utils.conv_with_bn(inputs,
                                    out_channels=12,
                                    filter_size=[config.time_width, 13],
                                    stride=1,
                                    act='relu',
                                    is_training=self._is_training,
                                    padding="SAME",
                                    name="conv_1")

            fm = utils.conv_with_bn(fm,
                                    out_channels=16,
                                    filter_size=[config.time_width, 11],
                                    stride=1,
                                    act='relu',
                                    is_training=self._is_training,
                                    padding="SAME",
                                    name="conv_2")

            fm = utils.conv_with_bn(fm,
                                    out_channels=20,
                                    filter_size=[config.time_width, 9],
                                    stride=1,
                                    act='relu',
                                    is_training=self._is_training,
                                    padding="SAME",
                                    name="conv_3")

            fm_skip = utils.conv_with_bn(fm,
                                         out_channels=24,
                                         filter_size=[config.time_width, 7],
                                         stride=1,
                                         act='relu',
                                         is_training=self._is_training,
                                         padding="SAME",
                                         name="conv_4")

            fm = utils.conv_with_bn(fm_skip,
                                    out_channels=32,
                                    filter_size=[config.time_width, 7],
                                    stride=1,
                                    act='relu',
                                    is_training=self._is_training,
                                    padding="SAME",
                                    name="conv_5")

            fm = utils.conv_with_bn(fm,
                                    out_channels=24,
                                    filter_size=[config.time_width, 7],
                                    stride=1,
                                    act='relu',
                                    is_training=self._is_training,
                                    padding="SAME",
                                    name="conv_6") + fm_skip

            fm = utils.conv_with_bn(fm,
                                    out_channels=20,
                                    filter_size=[config.time_width, 9],
                                    stride=1,
                                    act='relu',
                                    is_training=self._is_training,
                                    padding="SAME",
                                    name="conv_7")

            fm = utils.conv_with_bn(fm,
                                    out_channels=16,
                                    filter_size=[config.time_width, 11],
                                    stride=1,
                                    act='relu',
                                    is_training=self._is_training,
                                    padding="SAME",
                                    name="conv_8")

            fm = utils.conv_with_bn(fm,
                                    out_channels=12,
                                    filter_size=[config.time_width, 13],
                                    stride=1,
                                    act='relu',
                                    is_training=self._is_training,
                                    padding="SAME",
                                    name="conv_9")

            fm = utils.conv_with_bn(
                fm,
                out_channels=1,
                filter_size=[config.time_width, config.freq_size],
                stride=1,
                act='linear',
                is_training=self._is_training,
                padding="SAME",
                name="conv_10")  # (batch_size, 1, config.freq_size, 1)

            # fm = utils.conv_with_bn(fm, out_channels=1, filter_size=[config.time_width, 1],
            #                         stride=1, act='linear', is_training=self._is_training,
            #                         padding="VALID", name="conv_last")
            fm = tf.squeeze(fm, [1, 3])

        elif config.mode is "fnn":

            keep_prob = self.keep_prob

            # inputs = tf.reshape(tf.squeeze(inputs, [3]), (-1, int(config.time_width*config.freq_size)))
            # inputs = tf.nn.dropout(inputs, keep_prob=keep_prob)
            #
            # h1 = tf.nn.relu(utils.batch_norm_affine_transform(inputs, 2048, name='hidden_1',
            #                                                         is_training=self._is_training))
            # # h1 = tf.nn.dropout(h1, keep_prob=keep_prob)
            #
            # h2 = tf.nn.relu(utils.batch_norm_affine_transform(h1, 2048, name='hidden_2',
            #                                                         is_training=self._is_training))
            # # h2 = tf.nn.dropout(h2, keep_prob=keep_prob)
            #
            # # h3 = tf.nn.relu(utils.batch_norm_affine_transform(h2, 2048, name='hidden_3',
            # #                                                         is_training=self._is_training))
            # # h3 = tf.nn.dropout(h3, keep_prob=keep_prob)
            #
            # fm = utils.affine_transform(h2, config.freq_size, name='logits')

            inputs = tf.reshape(tf.squeeze(
                inputs, [3]), (-1, int(config.time_width * config.freq_size)))
            inputs = tf.nn.dropout(inputs, keep_prob=keep_prob)

            h1 = tf.nn.selu(
                utils.affine_transform(inputs, 2048, name='hidden_1'))
            h1 = tf.nn.dropout(h1, keep_prob=keep_prob)

            h2 = tf.nn.selu(utils.affine_transform(h1, 2048, name='hidden_2'))
            h2 = tf.nn.dropout(h2, keep_prob=keep_prob)

            h3 = tf.nn.selu(utils.affine_transform(h2, 2048, name='hidden_3'))
            h3 = tf.nn.dropout(h3, keep_prob=keep_prob)

            fm = utils.affine_transform(h3, config.freq_size, name='logits')

        return fm