示例#1
0
# set log level to debug
tf.sg_verbosity(10)

#
# hyper parameters
#

batch_size = 16  # total batch size

#
# inputs
#

# corpus input tensor
data = SpeechCorpus(batch_size=batch_size * tf.sg_gpus())

# mfcc feature of audio
inputs = tf.split(data.mfcc, tf.sg_gpus(), axis=0)
# target sentence label
labels = tf.split(data.label, tf.sg_gpus(), axis=0)

# sequence length except zero-padding
seq_len = []
for input_ in inputs:
    seq_len.append(
        tf.not_equal(input_.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1))


# parallel loss tower
@tf.sg_parallel
import sugartensor as tf

__author__ = '*****@*****.**'

# set log level to debug
tf.sg_verbosity(10)

# batch size
batch_size = 128

# MNIST input tensor ( batch size should be adjusted for multiple GPUS )
data = tf.sg_data.Mnist(batch_size=batch_size * tf.sg_gpus())

# split inputs for each GPU tower
inputs = tf.split(data.train.image, tf.sg_gpus(), axis=0)
labels = tf.split(data.train.label, tf.sg_gpus(), axis=0)


# simple wrapping function with decorator for parallel training
@tf.sg_parallel
def get_loss(opt):

    # conv layers
    with tf.sg_context(name='convs', act='relu', bn=True):
        conv = (opt.input[opt.gpu_index].sg_conv(
            dim=16, name='conv1').sg_pool().sg_conv(
                dim=32,
                name='conv2').sg_pool().sg_conv(dim=32,
                                                name='conv3').sg_pool())

    # fc layers
示例#3
0
# set log level to debug
tf.sg_verbosity(10)


#
# hyper parameters
#

batch_size = 16    # total batch size

#
# inputs
#

# corpus input tensor
data = SpeechCorpus(batch_size=batch_size * tf.sg_gpus())

# mfcc feature of audio
inputs = tf.split(data.mfcc, tf.sg_gpus(), axis=0)
# target sentence label
labels = tf.split(data.label, tf.sg_gpus(), axis=0)

# sequence length except zero-padding
seq_len = []
for input_ in inputs:
    seq_len.append(tf.not_equal(input_.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1))


# parallel loss tower
@tf.sg_parallel
def get_loss(opt):
示例#4
0
# set log level to debug
tf.sg_verbosity(10)

#
# hyper parameters
#

batch_size = 1  # total batch size

#
# inputs
#

# corpus input tensor
data = SpeechCorpus(batch_size=batch_size * tf.sg_gpus())

# mfcc feature of audio
x = data.mfcc
# target sentence label
y = data.label

seq_len = tf.not_equal(x.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1)

# encode audio feature
logit = get_logit(x, voca_size=voca_size)

# CTC loss
loss = logit.sg_ctc(target=y, seq_len=seq_len)

decoded_sequence, _ = tf.nn.ctc_beam_search_decoder(