Exemplo n.º 1
0
    def __call__(self,
                 inputs,
                 seq_length,
                 is_training=False,
                 reuse=False,
                 scope=None):
        '''
        Add the CNN variables and operations to the graph
        '''

        with tf.variable_scope(scope or type(self).__name__, reuse=reuse):

            #input layer
            conv = Conv2dLayer(self.num_units, 3, 1)

            #output layer
            outlayer = FFLayer(self.output_dim,
                               TfActivation(None, lambda (x): x), 0)

            time_steps = [inputs]
            num_time_steps = 11

            print inputs[1]

            for i in range(num_time_steps):
                forward = tf.pad(inputs[:, i + 1:, :],
                                 [[0, 0][0, i + 1], [0, 0]])
                backward = tf.pad(inputs[:, :-i - 1, :],
                                  [[0, 0][i + 1, 0], [0, 0]])
                time_steps += [forward, backward]
            logits = tf.pack(time_steps, axis=3)

            #apply the input layer
            #logits = tf.expand_dims(inputs, 3)
            for l in range(1, self.num_layers):
                logits = conv(logits, seq_length, is_training,
                              'convlayer' + str(l))
                logits = tf.nn.relu(logits)

            #stack all the output channels for the final layer
            logits = tf.reshape(logits, list(logits.get_shape()[0:2] + [-1]))

            #convert the logits to nonsequence logits for the output layer
            logits = seq_convertors.seq2nonseq(logits, seq_length)

            logits = outlayer(logits, seq_length, is_training, 'outlayer')

            #convert the logits to sequence logits to match expected output
            logits = seq_convertors.nonseq2seq(logits, seq_length,
                                               int(inputs.get_shape()[0]))

            #create a saver
            saver = tf.train.Saver()

            control_ops = None

        return seq_logits, seq_length, saver, control_ops
Exemplo n.º 2
0
    def __call__(self,
                 inputs,
                 input_seq_length,
                 targets=None,
                 target_seq_length=None,
                 is_training=False,
                 reuse=False,
                 scope=None):
        '''
        Add the neural net variables and operations to the graph

        Args:
            inputs: the inputs to the neural network, this is a
                [batch_size x max_input_length x feature_dim] tensor
            input_seq_length: The sequence lengths of the input utterances, this
                is a [batch_size] dimansional vector
            targets: the targets to the neural network, this is a
                [batch_size x max_output_length x 1] tensor. The targets can be
                used during training
            target_seq_length: The sequence lengths of the target utterances,
                this is a [batch_size] dimansional vector
            is_training: whether or not the network is in training mode
            reuse: wheter or not the variables in the network should be reused
            scope: the name scope

        Returns:
            A quadruple containing:
                - output logits
                - the output logits sequence lengths as a vector
                - a saver object
                - a dictionary of control operations (may be empty)
        '''

        with tf.variable_scope(scope or type(self).__name__, reuse=reuse):

            #create the input layer
            inlayer = Conv1dlayer(self.num_units, self.kernel_size, 1)

            #create the gated convolutional layers
            dconv = GatedDilatedConvolution(self.kernel_size)

            #create the fully connected layer
            act = activation.TfActivation(None, tf.nn.relu)
            fflayer = FFLayer(self.num_units, act)

            #create the output layer
            act = activation.TfActivation(None, lambda x: x)
            outlayer = FFLayer(self.output_dim, act)

            #apply the input layer
            logits = 0
            forward = inlayer(inputs, is_training, reuse, 'inlayer')

            #apply the the blocks of dilated convolutions layers
            for b in range(self.num_blocks):
                for l in range(self.num_layers):
                    forward, highway = dconv(forward, 2**l, is_training, reuse,
                                             'dconv%d-%d' % (b, l))
                    logits += highway

            #go to nonsequential data
            logits = seq_convertors.seq2nonseq(logits, input_seq_length)

            #apply the relu
            logits = tf.nn.relu(logits)

            #apply the fully connected layer
            logits = fflayer(logits, is_training, reuse, scope='FFlayer')

            #apply the output layer
            logits = outlayer(logits, is_training, reuse, scope='outlayer')

            #go back to sequential data
            logits = seq_convertors.nonseq2seq(logits, input_seq_length,
                                               int(inputs.get_shape()[1]))

            #create a saver
            saver = tf.train.Saver()

        return logits, input_seq_length, saver, None
Exemplo n.º 3
0
    def decode_data(self, writer):

        self.retrieved_data()

        ##########################
        ### GRAPH DEFINITION
        ##########################

        g = tf.Graph()
        with g.as_default():

            decode_inputs = tf.placeholder(
                tf.float32,
                shape=[self.max_length, self.input_dim],
                name='decode_inputs')

            decode_seq_length = tf.placeholder(tf.int32,
                                               shape=[1],
                                               name='decode_seq_length')

            split_inputs = tf.unstack(tf.expand_dims(decode_inputs, 1),
                                      name="decode_split_inputs_op")

            nonseq_inputs = seq_convertors.seq2nonseq(split_inputs,
                                                      decode_seq_length)

            # Multilayer perceptron
            layer_1 = tf.add(tf.matmul(nonseq_inputs, self.weights_h1),
                             self.bias_b1)
            layer_1 = tf.nn.tanh(layer_1)

            layer_2 = tf.add(tf.matmul(layer_1, self.weights_h2), self.bias_b2)
            layer_2 = tf.nn.tanh(layer_2)

            logits = tf.add(tf.matmul(layer_2, self.weights_out),
                            self.bias_out,
                            name="logits_op")

            seq_logits = seq_convertors.nonseq2seq(logits, decode_seq_length,
                                                   len(split_inputs))

            decode_logits = seq_convertors.seq2nonseq(seq_logits,
                                                      decode_seq_length)

            outputs = tf.nn.softmax(decode_logits, name="final_operation")

        ##########################
        ###      EVALUATION
        ##########################

        config = tf.ConfigProto()
        #config.gpu_options.allow_growth = True
        config.gpu_options.per_process_gpu_memory_fraction = 0.9

        with tf.Session(graph=g, config=config) as sess:

            #with tf.Session(graph=g) as sess:

            sess.run(tf.global_variables_initializer())

            for i in range(self.total_uttarences):

                utt_id = self.utt_id_list[i]

                utt_mat = self.utt_dict[utt_id]

                input_seq_length = [utt_mat.shape[0]]
                #pad the inputs
                utt_mat = np.append(
                    utt_mat,
                    np.zeros(
                        [self.max_length - utt_mat.shape[0],
                         utt_mat.shape[1]]), 0)

                outputs_value = sess.run('final_operation:0',
                                         feed_dict={
                                             'decode_inputs:0':
                                             utt_mat,
                                             'decode_seq_length:0':
                                             input_seq_length
                                         })

                # print (outputs_value.shape)
                # print (type(outputs_value))

                #get state likelihoods by dividing by the prior
                output = outputs_value / self.prior

                #floor the values to avoid problems with log
                np.where(output == 0, np.finfo(float).eps, output)

                # print (output.shape)
                # print (type(output))

                #write the pseudo-likelihoods in kaldi feature format
                writer.write_next_utt(utt_id, np.log(output))

        #close the writer
        writer.close()
Exemplo n.º 4
0
    def __call__(self,
                 inputs,
                 seq_length,
                 is_training=False,
                 reuse=False,
                 scope=None):
        '''
        Add the DNN variables and operations to the graph

        Args:
            inputs: the inputs to the neural network, this is a list containing
                a [batch_size, input_dim] tensor for each time step
            seq_length: The sequence lengths of the input utterances, if None
                the maximal sequence length will be taken
            is_training: whether or not the network is in training mode
            reuse: wheter or not the variables in the network should be reused
            scope: the name scope

        Returns:
            A triple containing:
                - output logits
                - the output logits sequence lengths as a vector
                - a saver object
                - a dictionary of control operations:
                    -add: add a layer to the network
                    -init: initialise the final layer
        '''

        with tf.variable_scope(scope or type(self).__name__, reuse=reuse):

            #input layer
            layer = FFLayer(self.num_units, self.activation)

            #output layer
            outlayer = FFLayer(self.output_dim,
                               TfActivation(None, lambda (x): x), 0)

            #do the forward computation

            #convert the sequential data to non sequential data
            nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length)

            activations = [None] * self.num_layers
            activations[0] = layer(nonseq_inputs, is_training, reuse, 'layer0')
            for l in range(1, self.num_layers):
                activations[l] = layer(activations[l - 1], is_training, reuse,
                                       'layer' + str(l))

            if self.layerwise_init:

                #variable that determines how many layers are initialised
                #in the neural net
                initialisedlayers = tf.get_variable(
                    'initialisedlayers', [],
                    initializer=tf.constant_initializer(0),
                    trainable=False,
                    dtype=tf.int32)

                #operation to increment the number of layers
                add_layer_op = initialisedlayers.assign(initialisedlayers +
                                                        1).op

                #compute the logits by selecting the activations at the layer
                #that has last been added to the network, this is used for layer
                #by layer initialisation
                logits = tf.case([(tf.equal(initialisedlayers, tf.constant(l)),
                                   Callable(activations[l]))
                                  for l in range(len(activations))],
                                 default=Callable(activations[-1]),
                                 exclusive=True,
                                 name='layerSelector')

                logits.set_shape([None, self.num_units])
            else:
                logits = activations[-1]

            logits = outlayer(logits, is_training, reuse,
                              'layer' + str(self.num_layers))

            if self.layerwise_init:
                #operation to initialise the final layer
                init_last_layer_op = tf.initialize_variables(
                    tf.get_collection(tf.GraphKeys.VARIABLES,
                                      scope=(tf.get_variable_scope().name +
                                             '/layer' + str(self.num_layers))))

                control_ops = {'add': add_layer_op, 'init': init_last_layer_op}
            else:
                control_ops = None

            #convert the logits to sequence logits to match expected output
            seq_logits = seq_convertors.nonseq2seq(logits, seq_length,
                                                   len(inputs))

            #create a saver
            saver = tf.train.Saver()

        return seq_logits, seq_length, saver, control_ops
Exemplo n.º 5
0
    def __call__(self,
                 inputs,
                 input_seq_length,
                 targets=None,
                 target_seq_length=None,
                 is_training=False,
                 reuse=False,
                 scope=None):
        '''
        Add the neural net variables and operations to the graph

        Args:
            inputs: the inputs to the neural network, this is a
                [batch_size x max_input_length x feature_dim] tensor
            input_seq_length: The sequence lengths of the input utterances, this
                is a [batch_size] dimansional vector
            targets: the targets to the neural network, this is a
                [batch_size x max_output_length x 1] tensor. The targets can be
                used during training
            target_seq_length: The sequence lengths of the target utterances,
                this is a [batch_size] dimansional vector
            is_training: whether or not the network is in training mode
            reuse: wheter or not the variables in the network should be reused
            scope: the name scope

        Returns:
            A quadruple containing:
                - output logits
                - the output logits sequence lengths as a vector
                - a saver object
                - a dictionary of control operations (may be empty)
        '''

        with tf.variable_scope(scope or type(self).__name__, reuse=reuse):

            #the blstm layer
            blstm = BLSTMLayer(self.num_units)

            #the linear output layer
            outlayer = FFLayer(self.output_dim,
                               TfActivation(None, lambda (x): x), 0)

            #do the forward computation

            #add gaussian noise to the inputs
            if is_training:
                logits = inputs + tf.random_normal(inputs.get_shape(),
                                                   stddev=0.6)
            else:
                logits = inputs

            for layer in range(self.num_layers):
                logits = blstm(logits, input_seq_length, is_training, reuse,
                               'layer' + str(layer))

                logits = self.activation(logits, is_training, reuse)

            logits = seq_convertors.seq2nonseq(logits, input_seq_length)

            logits = outlayer(logits, is_training, reuse, 'outlayer')

            logits = seq_convertors.nonseq2seq(logits, input_seq_length,
                                               int(inputs.get_shape()[1]))

            #create a saver
            saver = tf.train.Saver()

        return logits, input_seq_length, saver, None
Exemplo n.º 6
0
    def __call__(self,
                 inputs,
                 seq_length,
                 is_training=False,
                 reuse=False,
                 scope=None):
        '''
        Add the LSTM variables and operations to the graph

        Args:
            inputs: the inputs to the neural network, this is a list containing
                a [batch_size, input_dim] tensor for each time step
            seq_length: The sequence lengths of the input utterances, if None
                the maximal sequence length will be taken
            is_training: whether or not the network is in training mode
            reuse: wheter or not the variables in the network should be reused
            scope: the name scope

        Returns:
            A triple containing:
                - output logits
                - the output logits sequence lengths as a vector
                - a saver object
                - a dictionary of control operations:
                    -add: add a layer to the network
                    -init: initialise the final layer
        '''

        with tf.variable_scope(scope or type(self).__name__, reuse=reuse):

            weights = {
                'out':
                tf.get_variable(
                    'weights_out', [self.num_units, self.output_dim],
                    initializer=tf.contrib.layers.xavier_initializer())
            }

            biases = {
                'out':
                tf.get_variable('biases_out', [self.output_dim],
                                initializer=tf.constant_initializer(0))
            }

            #convert the sequential data to non sequential data
            nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length)

            input_dim = nonseq_inputs.shape[1]
            nonseq_inputs = tf.reshape(nonseq_inputs, [-1, 11, 40])

            n_steps = 11
            nonseq_inputs = tf.transpose(nonseq_inputs, [1, 0, 2])

            keep_prob = 1
            # define the lstm cell
            # use the dropout in training mode
            if is_training and keep_prob < 1:
                lstm_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(
                    self.num_units,
                    forget_bias=0.0,
                    input_size=None,
                    activation=tf.nn.relu,
                    layer_norm=False,
                    norm_gain=1.0,
                    norm_shift=0.0,
                    dropout_keep_prob=keep_prob,
                    dropout_prob_seed=None)

            lstm_cell = tf.contrib.rnn.LayerNormBasicLSTMCell(
                self.num_units,
                forget_bias=0.0,
                input_size=None,
                activation=tf.nn.relu,
                layer_norm=False,
                norm_gain=1.0,
                norm_shift=0.0,
                dropout_keep_prob=1,
                dropout_prob_seed=None)

            # stack the lstm to form multi-layers
            cell = tf.contrib.rnn.MultiRNNCell([lstm_cell] * self.num_layers,
                                               state_is_tuple=True)

            # print(int(nonseq_inputs.shape[0]))
            # self._initial_state = cell.zero_state(int(nonseq_inputs.shape[0]), tf.float32)

            # apply the dropout for the inputs to the first hidden layer
            if is_training and keep_prob < 1:
                nonseq_inputs = tf.nn.dropout(nonseq_inputs, keep_prob)

            final_nonseq_inputs = tf.unstack(nonseq_inputs,
                                             num=n_steps,
                                             axis=0)

            # Get lstm cell output initial_state=self._initial_state,
            outputs, states = tf.contrib.rnn.static_rnn(cell,
                                                        final_nonseq_inputs,
                                                        dtype=tf.float32)
            outputs = outputs[-1]

            # Linear activation, using rnn inner loop last output
            logits = tf.matmul(outputs, weights['out']) + biases['out']

            # # if self.layerwise_init:

            # # #variable that determines how many layers are initialised
            # # #in the neural net
            # # initialisedlayers = tf.get_variable(
            # # 'initialisedlayers', [],
            # # initializer=tf.constant_initializer(0),
            # # trainable=False,
            # # dtype=tf.int32)

            # # #operation to increment the number of layers
            # # add_layer_op = initialisedlayers.assign(initialisedlayers+1).op

            # # #compute the logits by selecting the activations at the layer
            # # #that has last been added to the network, this is used for layer
            # # #by layer initialisation
            # # logits = tf.case(
            # # [(tf.equal(initialisedlayers, tf.constant(l)),
            # # Callable(activations[l]))
            # # for l in range(len(activations))],
            # # default=Callable(activations[-1]),
            # # exclusive=True, name='layerSelector')

            # # logits.set_shape([None, self.num_units])

            if self.layerwise_init:
                #operation to initialise the final layer
                init_last_layer_op = tf.initialize_variables(
                    tf.get_collection(tf.GraphKeys.VARIABLES,
                                      scope=(tf.get_variable_scope().name +
                                             '/layer' + str(self.num_layers))))

                control_ops = {'add': add_layer_op, 'init': init_last_layer_op}
            else:
                control_ops = None

            #convert the logits to sequence logits to match expected output
            seq_logits = seq_convertors.nonseq2seq(logits, seq_length,
                                                   len(inputs))

            #create a saver
            saver = tf.train.Saver()

        return seq_logits, seq_length, saver, control_ops
Exemplo n.º 7
0
    def __call__(self,
                 inputs,
                 seq_length,
                 is_training=False,
                 reuse=False,
                 scope=None):
        '''
        Add the DNN variables and operations to the graph

        Args:
            inputs: the inputs to the neural network, this is a list containing
                a [batch_size, input_dim] tensor for each time step
            seq_length: The sequence lengths of the input utterances, if None
                the maximal sequence length will be taken
            is_training: whether or not the network is in training mode
            reuse: wheter or not the variables in the network should be reused
            scope: the name scope

        Returns:
            A triple containing:
                - output logits
                - the output logits sequence lengths as a vector
                - a saver object
                - a dictionary of control operations:
                    -add: add a layer to the network
                    -init: initialise the final layer
        '''

        with tf.variable_scope(scope or type(self).__name__, reuse=reuse):
            #input layer
            layer = FFLayer(self.num_units, self.activation)
            #output layer
            outlayer = FFLayer(self.output_dim,
                               TfActivation(None, lambda x: x), 0)

            #convert the sequential data to non sequential data
            ## if you wanna use the pure dnn, please uncommit this line
            #nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length)

            activations = [None] * self.num_layers

            # Define the first hidden layers
            # # the conv layer
            #cnn_layer = RestNet()
            #cnn_layer = CnnVd6()
            if self.cnn_type == 1:
                print('------The Cnn Config------')
                #convert the sequential data to non sequential data
                nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length)

                cnn_layer = CnnLayer(self.cnn_conf)
                activations[0] = cnn_layer(nonseq_inputs, is_training, reuse,
                                           'layer0')
            else:
                print("Not using CNN")
            # # the lstm layer, type 1
            if self.lstm_type == 1:
                print('------The LSTM Config------')
                #convert the sequential data to non sequential data
                # the inputs format is: time List(such as 777), each element is 2-D tensor like: batch_size(such as 64) x fre-dim
                # the nonseq_inputs format is: batch_size x fre-dim, 2-D tensor, here the batch_size = batch_size x time
                nonseq_inputs = seq_convertors.seq2nonseq(inputs, seq_length)
                print(
                    'Type1: The lstm data process is the similar to dnn, use the stacking frame and not output state is reused'
                )

                lstm_layer = LSTMLayer(self.lstm_conf)
                activations[0] = lstm_layer(nonseq_inputs, is_training, reuse,
                                            'layer0')
            ## the lstm layer, type 2
            elif self.lstm_type == 2:
                print('------The LSTM Config------')
                print('Type2: The lstm data process is totally sequencial')

                # here we directly use the seq data, that's para: inputs
                lstm_layer = LSTMLayer2(self.lstm_conf2)
                # the dynamic lstm's output has the format: time x batch_size x feature_dim
                seq_output = lstm_layer(inputs, seq_length, is_training, reuse,
                                        'layer0')

                # to connect the dnn, we should tran the seq output to no-seq
                # so we can use directly with dnn
                activations[0] = seq_convertors.seq2nonseq(
                    seq_output, seq_length)

            ## the lstm layer, type 3
            elif self.lstm_type == 3:
                print('------The LSTM Config------')
                print('Type3: The lstm data is processed in sub-seq')

                # here we directly use the seq data, that's para: inputs
                lstm_layer = LSTMLayer3(self.lstm_conf3, self.max_input_length)
                # the dynamic lstm's output has the format: time x batch_size x feature_dim
                seq_output = lstm_layer(inputs, seq_length, is_training, reuse,
                                        'layer0')
                # to connect the dnn, we should tran the seq output to no-seq
                # so we can use directly with dnn

                # Note:
                # the seq_output here should has the first index corresponding to the seq_length
                # shape like: [seq_length, batch-size, output-dim]
                activations[0] = seq_convertors.seq2nonseq(
                    seq_output, seq_length)
            else:
                print("Not using LSTM")

            # define the FL hidden layers
            print('------The DNN Config------')
            print("use %d FL hidden layer" % (self.FL_num_layers))
            for l in range(1, self.num_layers):
                print("the " + str(l) + " layer's input is: " +
                      str(activations[l - 1].shape))
                activations[l] = layer(activations[l - 1], is_training, reuse,
                                       'layer' + str(l))

            if self.layerwise_init:
                #variable that determines how many layers are initialised
                #in the neural net
                initialisedlayers = tf.get_variable(
                    'initialisedlayers', [],
                    initializer=tf.constant_initializer(0),
                    trainable=False,
                    dtype=tf.int32)

                #operation to increment the number of layers
                add_layer_op = initialisedlayers.assign(initialisedlayers +
                                                        1).op

                #compute the logits by selecting the activations at the layer
                #that has last been added to the network, this is used for layer
                #by layer initialisation
                logits = tf.case([(tf.equal(initialisedlayers, tf.constant(l)),
                                   Callable(activations[l]))
                                  for l in range(len(activations))],
                                 default=Callable(activations[-1]),
                                 exclusive=True,
                                 name='layerSelector')

                logits.set_shape([None, self.num_units])
            else:
                logits = activations[-1]

            logits = outlayer(logits, is_training, reuse,
                              'layer' + str(self.num_layers))

            if self.layerwise_init:
                #operation to initialise the final layer
                init_last_layer_op = tf.initialize_variables(
                    tf.get_collection(
                        tf.GraphKeys.VARIABLES,
                        scope=(tf.get_variable_scope().name + '/layer' +
                               str(self.FL_num_layers))))

                control_ops = {'add': add_layer_op, 'init': init_last_layer_op}
            else:
                control_ops = None

            #convert the logits to sequence logits to match expected output
            seq_logits = seq_convertors.nonseq2seq(logits, seq_length,
                                                   len(inputs))

            #create a saver
            saver = tf.train.Saver()

        return seq_logits, seq_length, saver, control_ops