def wst_net_v1(x_dict, dropout, reuse, is_training, n_classes): """Network to follow ST preprocessing. x should be (...) """ psi = win.fst2d_psi_factory([7, 7], include_avg=False) layer_params = layerO((1, 1), 'valid') nfeat = 32 with tf.variable_scope('wst_net_v1', reuse=reuse): x = x_dict['spectrograms'] x = tf.expand_dims(x, -1) # ..., ... U1 = scat2d(x, psi, layer_params) conv = tf.layers.conv2d(U1, nfeat, (7, 1), 1, activation=tf.nn.relu) conv = tf.layers.conv2d(conv, nfeat, (1, 7), 1, activation=tf.nn.relu) conv = tf.layers.conv2d(conv, nfeat, 5, 1, activation=tf.nn.relu) conv = tf.layers.conv2d(conv, nfeat, 5, 2, activation=tf.nn.relu) conv = tf.layers.conv2d(conv, nfeat, 5, 4, activation=tf.nn.relu) conv = tf.layers.conv2d(conv, nfeat, 5, 8, activation=tf.nn.relu) fc = tf.contrib.layers.flatten(conv) fc = tf.layers.dense(fc, 300) out = tf.layers.dense(fc, n_classes) return tf.squeeze(out, axis=1)
def scat2d_to_2d_2layer(x, reuse=tf.AUTO_REUSE, bs=batch_size): """ Args: x: in (batch, h, w, 1) shape Returns (batch, h, w, channels) """ psis = [None, None] layer_params = [None, None, None] with tf.variable_scope('scat2d_to_2d_2layer', reuse=reuse): # TF Estimator input is a dict, in case of multiple inputs psis[0] = win.fst2d_psi_factory([7, 7], include_avg=False) layer_params[0] = layerO((1, 1), 'valid') # 107, 107 U1 = scat2d(x, psis[0], layer_params[0]) psis[1] = win.fst2d_psi_factory([7, 7], include_avg=False) layer_params[1] = layerO((1, 1), 'valid') U2s = [] # only procede with increasing frequency paths for res_i, used_params in enumerate(psis[0].filter_params): increasing_psi = win.fst2d_psi_factory(psis[1].kernel_size, used_params) if increasing_psi.nfilt > 0: U2s.append( scat2d(U1[:, :, :, res_i:(res_i + 1)], increasing_psi, layer_params[1])) # 101, 101 U2 = tf.concat(U2s, 3) # swap to (batch, chanU2, h, w) U2 = tf.transpose(U2, [0, 3, 1, 2]) # reshape to (batch, h,w, 1) U2os = U2.get_shape() U2 = tf.reshape( U2, (bs * U2.get_shape()[1], U2.get_shape()[2], U2.get_shape()[3], 1)) # swap to (batch, chanU1, h, w) U1 = tf.transpose(U1, [0, 3, 1, 2]) # reshape to (batch, h,w, 1) U1os = U1.get_shape() U1 = tf.reshape( U1, (bs * U1.get_shape()[1], U1.get_shape()[2], U1.get_shape()[3], 1)) # now lo-pass # each layer lo-passed differently so that (h,w) align bc we # want to be able to do 2d convolutions afterwards again layer_params[2] = layerO((1, 1), 'valid') phi = win.fst2d_phi_factory([5, 5]) # filter and separate by original batch via old shape S0 = scat2d(x[:, 6:-6, 6:-6, :], phi, layer_params[2]) S0 = tf.reshape(S0, (bs, 1, S0.get_shape()[1], S0.get_shape()[2])) S1 = scat2d(U1[:, 3:-3, 3:-3, :], phi, layer_params[2]) S1 = tf.reshape(S1, (bs, U1os[1], S1.get_shape()[1], S1.get_shape()[2])) S2 = scat2d(U2, phi, layer_params[2]) S2 = tf.reshape(S2, (bs, U2os[1], S2.get_shape()[1], S2.get_shape()[2])) # (batch, chan, h,w) feat2d = tf.concat([S0, S1, S2], 1) return tf.transpose(feat2d, [0, 2, 3, 1])