# set log level to debug tf.sg_verbosity(10) # # hyper parameters # batch_size = 16 # total batch size # # inputs # # corpus input tensor data = SpeechCorpus(batch_size=batch_size * tf.sg_gpus()) # mfcc feature of audio inputs = tf.split(data.mfcc, tf.sg_gpus(), axis=0) # target sentence label labels = tf.split(data.label, tf.sg_gpus(), axis=0) # sequence length except zero-padding seq_len = [] for input_ in inputs: seq_len.append( tf.not_equal(input_.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1)) # parallel loss tower @tf.sg_parallel
import sugartensor as tf __author__ = '*****@*****.**' # set log level to debug tf.sg_verbosity(10) # batch size batch_size = 128 # MNIST input tensor ( batch size should be adjusted for multiple GPUS ) data = tf.sg_data.Mnist(batch_size=batch_size * tf.sg_gpus()) # split inputs for each GPU tower inputs = tf.split(data.train.image, tf.sg_gpus(), axis=0) labels = tf.split(data.train.label, tf.sg_gpus(), axis=0) # simple wrapping function with decorator for parallel training @tf.sg_parallel def get_loss(opt): # conv layers with tf.sg_context(name='convs', act='relu', bn=True): conv = (opt.input[opt.gpu_index].sg_conv( dim=16, name='conv1').sg_pool().sg_conv( dim=32, name='conv2').sg_pool().sg_conv(dim=32, name='conv3').sg_pool()) # fc layers
# set log level to debug tf.sg_verbosity(10) # # hyper parameters # batch_size = 16 # total batch size # # inputs # # corpus input tensor data = SpeechCorpus(batch_size=batch_size * tf.sg_gpus()) # mfcc feature of audio inputs = tf.split(data.mfcc, tf.sg_gpus(), axis=0) # target sentence label labels = tf.split(data.label, tf.sg_gpus(), axis=0) # sequence length except zero-padding seq_len = [] for input_ in inputs: seq_len.append(tf.not_equal(input_.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1)) # parallel loss tower @tf.sg_parallel def get_loss(opt):
# set log level to debug tf.sg_verbosity(10) # # hyper parameters # batch_size = 1 # total batch size # # inputs # # corpus input tensor data = SpeechCorpus(batch_size=batch_size * tf.sg_gpus()) # mfcc feature of audio x = data.mfcc # target sentence label y = data.label seq_len = tf.not_equal(x.sg_sum(axis=2), 0.).sg_int().sg_sum(axis=1) # encode audio feature logit = get_logit(x, voca_size=voca_size) # CTC loss loss = logit.sg_ctc(target=y, seq_len=seq_len) decoded_sequence, _ = tf.nn.ctc_beam_search_decoder(