예제 #1
0
def wst_net_v1(x_dict, dropout, reuse, is_training, n_classes):
    """Network to follow ST preprocessing.
    
    x should be (...)
    """

    psi = win.fst2d_psi_factory([7, 7], include_avg=False)

    layer_params = layerO((1, 1), 'valid')
    nfeat = 32

    with tf.variable_scope('wst_net_v1', reuse=reuse):
        x = x_dict['spectrograms']
        x = tf.expand_dims(x, -1)

        # ..., ...
        U1 = scat2d(x, psi, layer_params)

        conv = tf.layers.conv2d(U1, nfeat, (7, 1), 1, activation=tf.nn.relu)
        conv = tf.layers.conv2d(conv, nfeat, (1, 7), 1, activation=tf.nn.relu)
        conv = tf.layers.conv2d(conv, nfeat, 5, 1, activation=tf.nn.relu)
        conv = tf.layers.conv2d(conv, nfeat, 5, 2, activation=tf.nn.relu)
        conv = tf.layers.conv2d(conv, nfeat, 5, 4, activation=tf.nn.relu)
        conv = tf.layers.conv2d(conv, nfeat, 5, 8, activation=tf.nn.relu)

        fc = tf.contrib.layers.flatten(conv)
        fc = tf.layers.dense(fc, 300)
        out = tf.layers.dense(fc, n_classes)
    return tf.squeeze(out, axis=1)
예제 #2
0
def scat2d_to_2d_2layer(x, reuse=tf.AUTO_REUSE, bs=batch_size):
    """
    Args:
        x: in (batch, h, w, 1) shape
    Returns
        (batch, h, w, channels)
    """
    psis = [None, None]
    layer_params = [None, None, None]
    with tf.variable_scope('scat2d_to_2d_2layer', reuse=reuse):
        # TF Estimator input is a dict, in case of multiple inputs

        psis[0] = win.fst2d_psi_factory([7, 7], include_avg=False)
        layer_params[0] = layerO((1, 1), 'valid')

        # 107, 107
        U1 = scat2d(x, psis[0], layer_params[0])

        psis[1] = win.fst2d_psi_factory([7, 7], include_avg=False)
        layer_params[1] = layerO((1, 1), 'valid')

        U2s = []
        # only procede with increasing frequency paths
        for res_i, used_params in enumerate(psis[0].filter_params):
            increasing_psi = win.fst2d_psi_factory(psis[1].kernel_size,
                                                   used_params)
            if increasing_psi.nfilt > 0:
                U2s.append(
                    scat2d(U1[:, :, :, res_i:(res_i + 1)], increasing_psi,
                           layer_params[1]))

        # 101, 101
        U2 = tf.concat(U2s, 3)
        # swap to (batch, chanU2, h, w)
        U2 = tf.transpose(U2, [0, 3, 1, 2])
        # reshape to (batch, h,w, 1)
        U2os = U2.get_shape()
        U2 = tf.reshape(
            U2,
            (bs * U2.get_shape()[1], U2.get_shape()[2], U2.get_shape()[3], 1))

        # swap to (batch, chanU1, h, w)
        U1 = tf.transpose(U1, [0, 3, 1, 2])
        # reshape to (batch, h,w, 1)
        U1os = U1.get_shape()
        U1 = tf.reshape(
            U1,
            (bs * U1.get_shape()[1], U1.get_shape()[2], U1.get_shape()[3], 1))

        # now lo-pass

        # each layer lo-passed differently so that (h,w) align bc we
        # want to be able to do 2d convolutions afterwards again
        layer_params[2] = layerO((1, 1), 'valid')
        phi = win.fst2d_phi_factory([5, 5])

        # filter and separate by original batch via old shape
        S0 = scat2d(x[:, 6:-6, 6:-6, :], phi, layer_params[2])
        S0 = tf.reshape(S0, (bs, 1, S0.get_shape()[1], S0.get_shape()[2]))
        S1 = scat2d(U1[:, 3:-3, 3:-3, :], phi, layer_params[2])
        S1 = tf.reshape(S1,
                        (bs, U1os[1], S1.get_shape()[1], S1.get_shape()[2]))
        S2 = scat2d(U2, phi, layer_params[2])
        S2 = tf.reshape(S2,
                        (bs, U2os[1], S2.get_shape()[1], S2.get_shape()[2]))

        # (batch, chan, h,w)
        feat2d = tf.concat([S0, S1, S2], 1)

    return tf.transpose(feat2d, [0, 2, 3, 1])