def __init__(self, n_in, hidden_layer_size, n_out, L1_reg, L2_reg, hidden_layer_type, output_type='LINEAR', dropout_rate=0.0): """ This function initialises a neural network :param n_in: Dimensionality of input features :type in: Integer :param hidden_layer_size: The layer size for each hidden layer :type hidden_layer_size: A list of integers :param n_out: Dimensionality of output features :type n_out: Integrer :param hidden_layer_type: the activation types of each hidden layers, e.g., TANH, LSTM, GRU, BLSTM :param L1_reg: the L1 regulasation weight :param L2_reg: the L2 regulasation weight :param output_type: the activation type of the output layer, by default is 'LINEAR', linear regression. :param dropout_rate: probability of dropout, a float number between 0 and 1. """ logger = logging.getLogger("DNN initialization") self.n_in = int(n_in) self.n_out = int(n_out) self.n_layers = len(hidden_layer_size) self.dropout_rate = dropout_rate self.is_train = T.iscalar('is_train') assert len(hidden_layer_size) == len(hidden_layer_type) self.x = T.matrix('x') self.y = T.matrix('y') self.L1_reg = L1_reg self.L2_reg = L2_reg self.rnn_layers = [] self.params = [] self.delta_params = [] rng = np.random.RandomState(123) for i in xrange(self.n_layers): if i == 0: input_size = n_in else: input_size = hidden_layer_size[i - 1] if i == 0: layer_input = self.x else: layer_input = self.rnn_layers[i - 1].output if hidden_layer_type[i - 1] == 'BSLSTM' or hidden_layer_type[ i - 1] == 'BLSTM': input_size = hidden_layer_size[i - 1] * 2 if hidden_layer_type[i] == 'SLSTM': hidden_layer = SimplifiedLstm(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'SGRU': hidden_layer = SimplifiedGRU(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'GRU': hidden_layer = GatedRecurrentUnit(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'LSTM_NFG': hidden_layer = LstmNFG(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'LSTM_NOG': hidden_layer = LstmNOG(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'LSTM_NIG': hidden_layer = LstmNIG(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'LSTM_NPH': hidden_layer = LstmNoPeepholes(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'LSTM': hidden_layer = VanillaLstm(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'BSLSTM': hidden_layer = BidirectionSLstm(rng, layer_input, input_size, hidden_layer_size[i], hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'BLSTM': hidden_layer = BidirectionLstm(rng, layer_input, input_size, hidden_layer_size[i], hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'RNN': hidden_layer = VanillaRNN(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'TANH': hidden_layer = SigmoidLayer(rng, layer_input, input_size, hidden_layer_size[i], activation=T.tanh, p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'SIGMOID': hidden_layer = SigmoidLayer(rng, layer_input, input_size, hidden_layer_size[i], activation=T.nnet.sigmoid, p=self.dropout_rate, training=self.is_train) else: logger.critical( "This hidden layer type: %s is not supported right now! \n Please use one of the following: SLSTM, BSLSTM, TANH, SIGMOID\n" % (hidden_layer_type[i])) sys.exit(1) self.rnn_layers.append(hidden_layer) self.params.extend(hidden_layer.params) input_size = hidden_layer_size[-1] if hidden_layer_type[-1] == 'BSLSTM' or hidden_layer_type[ -1] == 'BLSTM': input_size = hidden_layer_size[-1] * 2 if output_type == 'LINEAR': self.final_layer = LinearLayer(rng, self.rnn_layers[-1].output, input_size, self.n_out) # elif output_type == 'BSLSTM': # self.final_layer = BidirectionLSTM(rng, self.rnn_layers[-1].output, input_size, hidden_layer_size[-1], self.n_out) else: logger.critical( "This output layer type: %s is not supported right now! \n Please use one of the following: LINEAR, BSLSTM\n" % (output_type)) sys.exit(1) self.params.extend(self.final_layer.params) self.updates = {} for param in self.params: self.updates[param] = theano.shared( value=np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX), name='updates') self.finetune_cost = T.mean( T.sum((self.final_layer.output - self.y)**2, axis=1)) self.errors = T.mean( T.sum((self.final_layer.output - self.y)**2, axis=1))
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, n_outs=10, l1_reg = None, l2_reg = None, hidden_layers_sizes=[500, 500], hidden_activation='tanh', output_activation='linear', projection_insize=100, projection_outsize=10, first_layer_split=True, expand_by_minibatch=False, initial_projection_distrib='gaussian', use_rprop=0, rprop_init_update=0.001): ## beginning at label index 1, 5 blocks of 49 inputs each to be projected to 10 dim. logger = logging.getLogger("TP-DNN initialization") self.projection_insize = projection_insize self.projection_outsize = projection_outsize self.sigmoid_layers = [] self.params = [] self.delta_params = [] self.n_layers = len(hidden_layers_sizes) self.output_activation = output_activation self.use_rprop = use_rprop self.rprop_init_update = rprop_init_update self.l1_reg = l1_reg self.l2_reg = l2_reg assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.numpy_rng = numpy_rng # allocate symbolic variables for the data self.x = T.matrix('x') if expand_by_minibatch: self.x_proj = T.ivector('x_proj') else: self.x_proj = T.matrix('x_proj') self.y = T.matrix('y') if expand_by_minibatch: z = theano.tensor.zeros((self.x_proj.shape[0], self.projection_insize)) indexes = self.x_proj one_hot = theano.tensor.set_subtensor(z[theano.tensor.arange(self.x_proj.shape[0]), indexes], 1) projection_input = one_hot else: projection_input = self.x_proj ## Make projection layer self.projection_layer = TokenProjectionLayer(rng=numpy_rng, input=projection_input, projection_insize = self.projection_insize, projection_outsize = self.projection_outsize, initial_projection_distrib=initial_projection_distrib) self.params.extend(self.projection_layer.params) self.delta_params.extend(self.projection_layer.delta_params) first_layer_input = T.concatenate([self.x, self.projection_layer.output], axis=1) for i in xrange(self.n_layers): if i == 0: input_size = n_ins + self.projection_outsize else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = first_layer_input else: layer_input = self.sigmoid_layers[-1].output if i == 0 and first_layer_split: sigmoid_layer = SplitHiddenLayer(rng=numpy_rng, input=layer_input, n_in1=n_ins, n_in2=self.projection_outsize, n_out=hidden_layers_sizes[i], activation=T.tanh) ##T.nnet.sigmoid) # else: sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.tanh) ##T.nnet.sigmoid) # self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) self.delta_params.extend(sigmoid_layer.delta_params) # add final layer if self.output_activation == 'linear': self.final_layer = LinearLayer(rng = numpy_rng, input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) elif self.output_activation == 'sigmoid': self.final_layer = SigmoidLayer( rng = numpy_rng, input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs, activation=T.nnet.sigmoid) else: logger.critical("This output activation function: %s is not supported right now!" %(self.output_activation)) sys.exit(1) self.params.extend(self.final_layer.params) self.delta_params.extend(self.final_layer.delta_params) ## params for 2 hidden layers, projection, first split layer, will look like this: ## [W_proj; W_1a, W_1b, b_1; W_2 b_2; W_o, b_o] ### MSE self.finetune_cost = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 )) self.errors = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 ))
class DeepRecurrentNetwork(object): """ This class is to assemble various neural network architectures. From basic feedforward neural network to bidirectional gated recurrent neural networks and hybrid architecture. **Hybrid** means a combination of feedforward and recurrent architecture. """ def __init__(self, n_in, hidden_layer_size, n_out, L1_reg, L2_reg, hidden_layer_type, output_type='LINEAR', dropout_rate=0.0, optimizer='sgd', loss_function='MMSE', rnn_batch_training=False): """ This function initialises a neural network :param n_in: Dimensionality of input features :type in: Integer :param hidden_layer_size: The layer size for each hidden layer :type hidden_layer_size: A list of integers :param n_out: Dimensionality of output features :type n_out: Integrer :param hidden_layer_type: the activation types of each hidden layers, e.g., TANH, LSTM, GRU, BLSTM :param L1_reg: the L1 regulasation weight :param L2_reg: the L2 regulasation weight :param output_type: the activation type of the output layer, by default is 'LINEAR', linear regression. :param dropout_rate: probability of dropout, a float number between 0 and 1. """ logger = logging.getLogger("DNN initialization") self.n_in = int(n_in) self.n_out = int(n_out) self.n_layers = len(hidden_layer_size) self.dropout_rate = dropout_rate self.optimizer = optimizer self.loss_function = loss_function self.is_train = T.iscalar('is_train') self.rnn_batch_training = rnn_batch_training assert len(hidden_layer_size) == len(hidden_layer_type) self.list_of_activations = [ 'TANH', 'SIGMOID', 'SOFTMAX', 'RELU', 'RESU' ] if self.rnn_batch_training: self.x = T.tensor3('x') self.y = T.tensor3('y') else: self.x = T.matrix('x') self.y = T.matrix('y') self.L1_reg = L1_reg self.L2_reg = L2_reg self.rnn_layers = [] self.params = [] self.delta_params = [] rng = np.random.RandomState(123) for i in range(self.n_layers): if i == 0: input_size = n_in else: input_size = hidden_layer_size[i - 1] if i == 0: layer_input = self.x else: layer_input = self.rnn_layers[i - 1].output if hidden_layer_type[i - 1] == 'BSLSTM' or hidden_layer_type[ i - 1] == 'BLSTM': input_size = hidden_layer_size[i - 1] * 2 if hidden_layer_type[i] in self.list_of_activations: hidden_activation = hidden_layer_type[i].lower() hidden_layer = GeneralLayer(rng, layer_input, input_size, hidden_layer_size[i], activation=hidden_activation, p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'TANH_LHUC': hidden_layer = SigmoidLayer_LHUC(rng, layer_input, input_size, hidden_layer_size[i], activation=T.tanh, p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'SLSTM': hidden_layer = SimplifiedLstm( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'SGRU': hidden_layer = SimplifiedGRU( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'GRU': hidden_layer = GatedRecurrentUnit( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM_NFG': hidden_layer = LstmNFG( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM_NOG': hidden_layer = LstmNOG( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM_NIG': hidden_layer = LstmNIG( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM_NPH': hidden_layer = LstmNoPeepholes( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM': hidden_layer = VanillaLstm( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'BSLSTM': hidden_layer = BidirectionSLstm( rng, layer_input, input_size, hidden_layer_size[i], hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'BLSTM': hidden_layer = BidirectionLstm( rng, layer_input, input_size, hidden_layer_size[i], hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'RNN': hidden_layer = VanillaRNN( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM_LHUC': hidden_layer = VanillaLstm_LHUC( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) else: logger.critical( "This hidden layer type: %s is not supported right now! \n Please use one of the following: SLSTM, BSLSTM, TANH, SIGMOID\n" % (hidden_layer_type[i])) sys.exit(1) self.rnn_layers.append(hidden_layer) self.params.extend(hidden_layer.params) input_size = hidden_layer_size[-1] if hidden_layer_type[-1] == 'BSLSTM' or hidden_layer_type[ -1] == 'BLSTM': input_size = hidden_layer_size[-1] * 2 gender_class_bin = n_out - 1 acous_feat_begin = 0 acous_feat_end = n_out - 4 spkid_feat_begin = acous_feat_end spkid_feat_end = acous_feat_end + 3 gender_feat_begin = spkid_feat_end gender_feat_end = spkid_feat_end + 1 self.final_layer_acous = LinearLayer(rng, self.rnn_layers[-1].output, input_size, acous_feat_end - acous_feat_begin) self.final_layer_spkid = GeneralLayer(rng, self.rnn_layers[-1].output, input_size, spkid_feat_end - spkid_feat_begin, activation='softmax') self.final_layer_gender = SigmoidLayer(rng, self.rnn_layers[-1].output, input_size, gender_feat_end - gender_feat_begin, activation=T.nnet.sigmoid) output_activation = output_type.lower() # if output_activation == 'linear': # self.final_layer_acous = LinearLayer(rng, self.rnn_layers[-1].output, input_size, self.n_out) # elif output_activation == 'recurrent': # self.final_layer_acous = RecurrentOutputLayer(rng, self.rnn_layers[-1].output, input_size, self.n_out, rnn_batch_training=self.rnn_batch_training) # elif output_type.upper() in self.list_of_activations: # self.final_layer_acous = GeneralLayer(rng, self.rnn_layers[-1].output, input_size, self.n_out, activation=output_activation) # else: # logger.critical("This output layer type: %s is not supported right now! \n Please use one of the following: LINEAR, BSLSTM\n" %(output_type)) # sys.exit(1) self.params.extend(self.final_layer_acous.params) self.params.extend(self.final_layer_spkid.params) self.params.extend(self.final_layer_gender.params) self.updates = {} for param in self.params: self.updates[param] = theano.shared( value=np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX), name='updates') finetune_cost_acous = T.mean( T.sum((self.final_layer_acous.output - self.y[:, acous_feat_begin:acous_feat_end])**2, axis=1)) errors_acous = T.mean( T.sum((self.final_layer_acous.output - self.y[:, acous_feat_begin:acous_feat_end])**2, axis=1)) finetune_cost_spkid = self.categorical_crossentropy_loss( self.final_layer_spkid.output, self.y[:, spkid_feat_begin:spkid_feat_end]) errors_spkid = self.categorical_crossentropy_loss( self.final_layer_spkid.output, self.y[:, spkid_feat_begin:spkid_feat_end]) # finetune_cost_spkid = -1.0 * theano.tensor.log(self.y[:,spkid_feat_begin:spkid_feat_end]) # for m in self.final_layer_spkid.params: # finetune_cost_spkid += self.L2_reg * (theano.tensor.sqr(param.get_value()).sum()) # errors_spkid=finetune_cost_spkid.mean() finetune_cost_gender = self.final_layer_gender.errors( self.y[:, gender_feat_begin:gender_feat_end]) errors_acous_gender = self.final_layer_gender.errors( self.y[:, gender_feat_begin:gender_feat_end]) self.finetune_cost = finetune_cost_acous - 10 * np.log( finetune_cost_spkid) + finetune_cost_gender self.errors = errors_acous - 10 * np.log( errors_spkid) + errors_acous_gender def categorical_crossentropy_loss(self, predictions, targets): return T.nnet.categorical_crossentropy(predictions, targets).mean() def multiclass_hinge_loss(self, predictions, targets, delta=1): num_cls = predictions.shape[1] if targets.ndim == predictions.ndim - 1: targets = T.extra_ops.to_one_hot(targets, num_cls) elif targets.ndim != predictions.ndim: raise TypeError('rank mismatch between targets and predictions') corrects = predictions[targets.nonzero()] rest = T.reshape(predictions[(1 - targets).nonzero()], (-1, num_cls - 1)) rest = T.max(rest, axis=1) return T.nnet.relu(rest - corrects + delta).mean() def build_finetune_functions(self, train_shared_xy, valid_shared_xy, use_lhuc=False, layer_index=0): """ This function is to build finetune functions and to update gradients :param train_shared_xy: theano shared variable for input and output training data :type train_shared_xy: tuple of shared variable :param valid_shared_xy: theano shared variable for input and output development data :type valid_shared_xy: tuple of shared variable :returns: finetune functions for training and development """ logger = logging.getLogger("DNN initialization") (train_set_x, train_set_y) = train_shared_xy (valid_set_x, valid_set_y) = valid_shared_xy lr = T.scalar('lr', dtype=theano.config.floatX) mom = T.scalar('mom', dtype=theano.config.floatX) # momentum cost = self.finetune_cost #+ self.L2_reg * self.L2_sqr ## added for LHUC if use_lhuc: # In lhuc the parameters are only scaling parameters which have the name 'c' self.lhuc_params = [] for p in self.params: if p.name == 'c': self.lhuc_params.append(p) params = self.lhuc_params gparams = T.grad(cost, params) else: params = self.params gparams = T.grad(cost, params) freeze_params = 0 for layer in range(layer_index): freeze_params += len(self.rnn_layers[layer].params) # use optimizer if self.optimizer == 'sgd': # zip just concatenate two lists updates = OrderedDict() for i, (param, gparam) in enumerate(zip(params, gparams)): weight_update = self.updates[param] upd = mom * weight_update - lr * gparam updates[weight_update] = upd # freeze layers and update weights if i >= freeze_params: updates[param] = param + upd elif self.optimizer == 'adam': updates = compile_ADAM_train_function(self, gparams, learning_rate=lr) elif self.optimizer == 'rprop': updates = compile_RPROP_train_function(self, gparams) else: logger.critical( "This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n" % (self.optimizer)) sys.exit(1) train_model = theano.function( inputs=[lr, mom], #index, batch_size outputs=self.errors, updates=updates, givens={ self.x: train_set_x, #[index*batch_size:(index + 1)*batch_size] self.y: train_set_y, self.is_train: np.cast['int32'](1) }, on_unused_input='ignore') valid_model = theano.function(inputs=[], outputs=self.errors, givens={ self.x: valid_set_x, self.y: valid_set_y, self.is_train: np.cast['int32'](0) }, on_unused_input='ignore') return train_model, valid_model def parameter_prediction(self, test_set_x): #, batch_size """ This function is to predict the output of NN :param test_set_x: input features for a testing sentence :type test_set_x: python array variable :returns: predicted features """ n_test_set_x = test_set_x.shape[0] test_out_acous = theano.function([], self.final_layer_acous.output, givens={ self.x: test_set_x, self.is_train: np.cast['int32'](0) }, on_unused_input='ignore') test_out_gender = theano.function([], self.final_layer_gender.output, givens={ self.x: test_set_x, self.is_train: np.cast['int32'](0) }, on_unused_input='ignore') test_out_spkid = theano.function([], self.final_layer_spkid.output, givens={ self.x: test_set_x, self.is_train: np.cast['int32'](0) }, on_unused_input='ignore') predict_parameter = np.concatenate( [test_out_acous(), test_out_gender(), test_out_spkid()], axis=-1) return predict_parameter ## the function to output activations at a hidden layer def generate_hidden_layer(self, test_set_x, bn_layer_index): """ This function is to predict the bottleneck features of NN :param test_set_x: input features for a testing sentence :type test_set_x: python array variable :returns: predicted bottleneck features """ n_test_set_x = test_set_x.shape[0] test_out = theano.function([], self.rnn_layers[bn_layer_index].output, givens={ self.x: test_set_x, self.is_train: np.cast['int32'](0) }, on_unused_input='ignore') predict_parameter = test_out() return predict_parameter
def __init__(self, n_in, hidden_layer_size, n_out, L1_reg, L2_reg, hidden_layer_type, output_type='LINEAR', dropout_rate=0.0, optimizer='sgd', loss_function='MMSE', rnn_batch_training=False): """ This function initialises a neural network :param n_in: Dimensionality of input features :type in: Integer :param hidden_layer_size: The layer size for each hidden layer :type hidden_layer_size: A list of integers :param n_out: Dimensionality of output features :type n_out: Integrer :param hidden_layer_type: the activation types of each hidden layers, e.g., TANH, LSTM, GRU, BLSTM :param L1_reg: the L1 regulasation weight :param L2_reg: the L2 regulasation weight :param output_type: the activation type of the output layer, by default is 'LINEAR', linear regression. :param dropout_rate: probability of dropout, a float number between 0 and 1. """ logger = logging.getLogger("DNN initialization") self.n_in = int(n_in) self.n_out = int(n_out) self.n_layers = len(hidden_layer_size) self.dropout_rate = dropout_rate self.optimizer = optimizer self.loss_function = loss_function self.is_train = T.iscalar('is_train') self.rnn_batch_training = rnn_batch_training assert len(hidden_layer_size) == len(hidden_layer_type) self.list_of_activations = [ 'TANH', 'SIGMOID', 'SOFTMAX', 'RELU', 'RESU' ] if self.rnn_batch_training: self.x = T.tensor3('x') self.y = T.tensor3('y') else: self.x = T.matrix('x') self.y = T.matrix('y') self.L1_reg = L1_reg self.L2_reg = L2_reg self.rnn_layers = [] self.params = [] self.delta_params = [] rng = np.random.RandomState(123) for i in range(self.n_layers): if i == 0: input_size = n_in else: input_size = hidden_layer_size[i - 1] if i == 0: layer_input = self.x else: layer_input = self.rnn_layers[i - 1].output if hidden_layer_type[i - 1] == 'BSLSTM' or hidden_layer_type[ i - 1] == 'BLSTM': input_size = hidden_layer_size[i - 1] * 2 if hidden_layer_type[i] in self.list_of_activations: hidden_activation = hidden_layer_type[i].lower() hidden_layer = GeneralLayer(rng, layer_input, input_size, hidden_layer_size[i], activation=hidden_activation, p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'TANH_LHUC': hidden_layer = SigmoidLayer_LHUC(rng, layer_input, input_size, hidden_layer_size[i], activation=T.tanh, p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'SLSTM': hidden_layer = SimplifiedLstm( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'SGRU': hidden_layer = SimplifiedGRU( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'GRU': hidden_layer = GatedRecurrentUnit( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM_NFG': hidden_layer = LstmNFG( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM_NOG': hidden_layer = LstmNOG( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM_NIG': hidden_layer = LstmNIG( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM_NPH': hidden_layer = LstmNoPeepholes( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM': hidden_layer = VanillaLstm( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'BSLSTM': hidden_layer = BidirectionSLstm( rng, layer_input, input_size, hidden_layer_size[i], hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'BLSTM': hidden_layer = BidirectionLstm( rng, layer_input, input_size, hidden_layer_size[i], hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'RNN': hidden_layer = VanillaRNN( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM_LHUC': hidden_layer = VanillaLstm_LHUC( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) else: logger.critical( "This hidden layer type: %s is not supported right now! \n Please use one of the following: SLSTM, BSLSTM, TANH, SIGMOID\n" % (hidden_layer_type[i])) sys.exit(1) self.rnn_layers.append(hidden_layer) self.params.extend(hidden_layer.params) input_size = hidden_layer_size[-1] if hidden_layer_type[-1] == 'BSLSTM' or hidden_layer_type[ -1] == 'BLSTM': input_size = hidden_layer_size[-1] * 2 gender_class_bin = n_out - 1 acous_feat_begin = 0 acous_feat_end = n_out - 4 spkid_feat_begin = acous_feat_end spkid_feat_end = acous_feat_end + 3 gender_feat_begin = spkid_feat_end gender_feat_end = spkid_feat_end + 1 self.final_layer_acous = LinearLayer(rng, self.rnn_layers[-1].output, input_size, acous_feat_end - acous_feat_begin) self.final_layer_spkid = GeneralLayer(rng, self.rnn_layers[-1].output, input_size, spkid_feat_end - spkid_feat_begin, activation='softmax') self.final_layer_gender = SigmoidLayer(rng, self.rnn_layers[-1].output, input_size, gender_feat_end - gender_feat_begin, activation=T.nnet.sigmoid) output_activation = output_type.lower() # if output_activation == 'linear': # self.final_layer_acous = LinearLayer(rng, self.rnn_layers[-1].output, input_size, self.n_out) # elif output_activation == 'recurrent': # self.final_layer_acous = RecurrentOutputLayer(rng, self.rnn_layers[-1].output, input_size, self.n_out, rnn_batch_training=self.rnn_batch_training) # elif output_type.upper() in self.list_of_activations: # self.final_layer_acous = GeneralLayer(rng, self.rnn_layers[-1].output, input_size, self.n_out, activation=output_activation) # else: # logger.critical("This output layer type: %s is not supported right now! \n Please use one of the following: LINEAR, BSLSTM\n" %(output_type)) # sys.exit(1) self.params.extend(self.final_layer_acous.params) self.params.extend(self.final_layer_spkid.params) self.params.extend(self.final_layer_gender.params) self.updates = {} for param in self.params: self.updates[param] = theano.shared( value=np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX), name='updates') finetune_cost_acous = T.mean( T.sum((self.final_layer_acous.output - self.y[:, acous_feat_begin:acous_feat_end])**2, axis=1)) errors_acous = T.mean( T.sum((self.final_layer_acous.output - self.y[:, acous_feat_begin:acous_feat_end])**2, axis=1)) finetune_cost_spkid = self.categorical_crossentropy_loss( self.final_layer_spkid.output, self.y[:, spkid_feat_begin:spkid_feat_end]) errors_spkid = self.categorical_crossentropy_loss( self.final_layer_spkid.output, self.y[:, spkid_feat_begin:spkid_feat_end]) # finetune_cost_spkid = -1.0 * theano.tensor.log(self.y[:,spkid_feat_begin:spkid_feat_end]) # for m in self.final_layer_spkid.params: # finetune_cost_spkid += self.L2_reg * (theano.tensor.sqr(param.get_value()).sum()) # errors_spkid=finetune_cost_spkid.mean() finetune_cost_gender = self.final_layer_gender.errors( self.y[:, gender_feat_begin:gender_feat_end]) errors_acous_gender = self.final_layer_gender.errors( self.y[:, gender_feat_begin:gender_feat_end]) self.finetune_cost = finetune_cost_acous - 10 * np.log( finetune_cost_spkid) + finetune_cost_gender self.errors = errors_acous - 10 * np.log( errors_spkid) + errors_acous_gender
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, n_outs=10, l1_reg = None, l2_reg = None, hidden_layers_sizes=[500, 500], hidden_activation='tanh', output_activation='linear', use_rprop=0, rprop_init_update=0.001): logger = logging.getLogger("DNN initialization") self.sigmoid_layers = [] self.params = [] self.delta_params = [] self.n_layers = len(hidden_layers_sizes) self.output_activation = output_activation self.use_rprop = use_rprop self.rprop_init_update = rprop_init_update self.l1_reg = l1_reg self.l2_reg = l2_reg assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') self.y = T.matrix('y') for i in xrange(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.tanh) ##T.nnet.sigmoid) # self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) self.delta_params.extend(sigmoid_layer.delta_params) # add final layer if self.output_activation == 'linear': self.final_layer = LinearLayer(rng = numpy_rng, input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) elif self.output_activation == 'sigmoid': self.final_layer = SigmoidLayer( rng = numpy_rng, input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs, activation=T.nnet.sigmoid) else: logger.critical("This output activation function: %s is not supported right now!" %(self.output_activation)) sys.exit(1) self.params.extend(self.final_layer.params) self.delta_params.extend(self.final_layer.delta_params) ### MSE self.finetune_cost = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 )) self.errors = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 )) ### L1-norm if self.l1_reg is not None: for i in xrange(self.n_layers): W = self.params[i * 2] self.finetune_cost += self.l1_reg * (abs(W).sum()) ### L2-norm if self.l2_reg is not None: for i in xrange(self.n_layers): W = self.params[i * 2] self.finetune_cost += self.l2_reg * T.sqr(W).sum()