def __init__(self, n_in, hidden_layer_size, n_out, L1_reg, L2_reg, hidden_layer_type, output_type='LINEAR', dropout_rate=0.0): """ This function initialises a neural network :param n_in: Dimensionality of input features :type in: Integer :param hidden_layer_size: The layer size for each hidden layer :type hidden_layer_size: A list of integers :param n_out: Dimensionality of output features :type n_out: Integrer :param hidden_layer_type: the activation types of each hidden layers, e.g., TANH, LSTM, GRU, BLSTM :param L1_reg: the L1 regulasation weight :param L2_reg: the L2 regulasation weight :param output_type: the activation type of the output layer, by default is 'LINEAR', linear regression. :param dropout_rate: probability of dropout, a float number between 0 and 1. """ logger = logging.getLogger("DNN initialization") self.n_in = int(n_in) self.n_out = int(n_out) self.n_layers = len(hidden_layer_size) self.dropout_rate = dropout_rate self.is_train = T.iscalar('is_train') assert len(hidden_layer_size) == len(hidden_layer_type) self.x = T.matrix('x') self.y = T.matrix('y') self.L1_reg = L1_reg self.L2_reg = L2_reg self.rnn_layers = [] self.params = [] self.delta_params = [] rng = np.random.RandomState(123) for i in xrange(self.n_layers): if i == 0: input_size = n_in else: input_size = hidden_layer_size[i - 1] if i == 0: layer_input = self.x else: layer_input = self.rnn_layers[i - 1].output if hidden_layer_type[i - 1] == 'BSLSTM' or hidden_layer_type[ i - 1] == 'BLSTM': input_size = hidden_layer_size[i - 1] * 2 if hidden_layer_type[i] == 'SLSTM': hidden_layer = SimplifiedLstm(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'SGRU': hidden_layer = SimplifiedGRU(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'GRU': hidden_layer = GatedRecurrentUnit(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'LSTM_NFG': hidden_layer = LstmNFG(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'LSTM_NOG': hidden_layer = LstmNOG(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'LSTM_NIG': hidden_layer = LstmNIG(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'LSTM_NPH': hidden_layer = LstmNoPeepholes(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'LSTM': hidden_layer = VanillaLstm(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'BSLSTM': hidden_layer = BidirectionSLstm(rng, layer_input, input_size, hidden_layer_size[i], hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'BLSTM': hidden_layer = BidirectionLstm(rng, layer_input, input_size, hidden_layer_size[i], hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'RNN': hidden_layer = VanillaRNN(rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'TANH': hidden_layer = SigmoidLayer(rng, layer_input, input_size, hidden_layer_size[i], activation=T.tanh, p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'SIGMOID': hidden_layer = SigmoidLayer(rng, layer_input, input_size, hidden_layer_size[i], activation=T.nnet.sigmoid, p=self.dropout_rate, training=self.is_train) else: logger.critical( "This hidden layer type: %s is not supported right now! \n Please use one of the following: SLSTM, BSLSTM, TANH, SIGMOID\n" % (hidden_layer_type[i])) sys.exit(1) self.rnn_layers.append(hidden_layer) self.params.extend(hidden_layer.params) input_size = hidden_layer_size[-1] if hidden_layer_type[-1] == 'BSLSTM' or hidden_layer_type[ -1] == 'BLSTM': input_size = hidden_layer_size[-1] * 2 if output_type == 'LINEAR': self.final_layer = LinearLayer(rng, self.rnn_layers[-1].output, input_size, self.n_out) # elif output_type == 'BSLSTM': # self.final_layer = BidirectionLSTM(rng, self.rnn_layers[-1].output, input_size, hidden_layer_size[-1], self.n_out) else: logger.critical( "This output layer type: %s is not supported right now! \n Please use one of the following: LINEAR, BSLSTM\n" % (output_type)) sys.exit(1) self.params.extend(self.final_layer.params) self.updates = {} for param in self.params: self.updates[param] = theano.shared( value=np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX), name='updates') self.finetune_cost = T.mean( T.sum((self.final_layer.output - self.y)**2, axis=1)) self.errors = T.mean( T.sum((self.final_layer.output - self.y)**2, axis=1))
def __init__(self, n_in, hidden_layer_size, n_out, L1_reg, L2_reg, hidden_layer_type, output_type='LINEAR', dropout_rate=0.0, optimizer='sgd', loss_function='MMSE', rnn_batch_training=False): """ This function initialises a neural network :param n_in: Dimensionality of input features :type in: Integer :param hidden_layer_size: The layer size for each hidden layer :type hidden_layer_size: A list of integers :param n_out: Dimensionality of output features :type n_out: Integrer :param hidden_layer_type: the activation types of each hidden layers, e.g., TANH, LSTM, GRU, BLSTM :param L1_reg: the L1 regulasation weight :param L2_reg: the L2 regulasation weight :param output_type: the activation type of the output layer, by default is 'LINEAR', linear regression. :param dropout_rate: probability of dropout, a float number between 0 and 1. """ logger = logging.getLogger("DNN initialization") self.n_in = int(n_in) self.n_out = int(n_out) self.n_layers = len(hidden_layer_size) self.dropout_rate = dropout_rate self.optimizer = optimizer self.loss_function = loss_function self.is_train = T.iscalar('is_train') self.rnn_batch_training = rnn_batch_training assert len(hidden_layer_size) == len(hidden_layer_type) self.list_of_activations = [ 'TANH', 'SIGMOID', 'SOFTMAX', 'RELU', 'RESU' ] if self.rnn_batch_training: self.x = T.tensor3('x') self.y = T.tensor3('y') else: self.x = T.matrix('x') self.y = T.matrix('y') self.L1_reg = L1_reg self.L2_reg = L2_reg self.rnn_layers = [] self.params = [] self.delta_params = [] rng = np.random.RandomState(123) for i in range(self.n_layers): if i == 0: input_size = n_in else: input_size = hidden_layer_size[i - 1] if i == 0: layer_input = self.x else: layer_input = self.rnn_layers[i - 1].output if hidden_layer_type[i - 1] == 'BSLSTM' or hidden_layer_type[ i - 1] == 'BLSTM': input_size = hidden_layer_size[i - 1] * 2 if hidden_layer_type[i] in self.list_of_activations: hidden_activation = hidden_layer_type[i].lower() hidden_layer = GeneralLayer(rng, layer_input, input_size, hidden_layer_size[i], activation=hidden_activation, p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'TANH_LHUC': hidden_layer = SigmoidLayer_LHUC(rng, layer_input, input_size, hidden_layer_size[i], activation=T.tanh, p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'SLSTM': hidden_layer = SimplifiedLstm( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'SGRU': hidden_layer = SimplifiedGRU( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'GRU': hidden_layer = GatedRecurrentUnit( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM_NFG': hidden_layer = LstmNFG( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM_NOG': hidden_layer = LstmNOG( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM_NIG': hidden_layer = LstmNIG( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM_NPH': hidden_layer = LstmNoPeepholes( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM': hidden_layer = VanillaLstm( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'BSLSTM': hidden_layer = BidirectionSLstm( rng, layer_input, input_size, hidden_layer_size[i], hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'BLSTM': hidden_layer = BidirectionLstm( rng, layer_input, input_size, hidden_layer_size[i], hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'RNN': hidden_layer = VanillaRNN( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM_LHUC': hidden_layer = VanillaLstm_LHUC( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) else: logger.critical( "This hidden layer type: %s is not supported right now! \n Please use one of the following: SLSTM, BSLSTM, TANH, SIGMOID\n" % (hidden_layer_type[i])) sys.exit(1) self.rnn_layers.append(hidden_layer) self.params.extend(hidden_layer.params) input_size = hidden_layer_size[-1] if hidden_layer_type[-1] == 'BSLSTM' or hidden_layer_type[ -1] == 'BLSTM': input_size = hidden_layer_size[-1] * 2 output_activation = output_type.lower() if output_activation == 'linear': self.final_layer = LinearLayer(rng, self.rnn_layers[-1].output, input_size, self.n_out) elif output_activation == 'recurrent': self.final_layer = RecurrentOutputLayer( rng, self.rnn_layers[-1].output, input_size, self.n_out, rnn_batch_training=self.rnn_batch_training) elif output_type.upper() in self.list_of_activations: self.final_layer = GeneralLayer(rng, self.rnn_layers[-1].output, input_size, self.n_out, activation=output_activation) else: logger.critical( "This output layer type: %s is not supported right now! \n Please use one of the following: LINEAR, BSLSTM\n" % (output_type)) sys.exit(1) self.params.extend(self.final_layer.params) self.updates = {} for param in self.params: self.updates[param] = theano.shared( value=np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX), name='updates') if self.loss_function == 'CCE': self.finetune_cost = self.categorical_crossentropy_loss( self.final_layer.output, self.y) self.errors = self.categorical_crossentropy_loss( self.final_layer.output, self.y) elif self.loss_function == 'Hinge': self.finetune_cost = self.multiclass_hinge_loss( self.final_layer.output, self.y) self.errors = self.multiclass_hinge_loss(self.final_layer.output, self.y) elif self.loss_function == 'MMSE': if self.rnn_batch_training: self.y_mod = T.reshape(self.y, (-1, n_out)) self.final_layer_output = T.reshape(self.final_layer.output, (-1, n_out)) nonzero_rows = T.any(self.y_mod, 1).nonzero() self.y_mod = self.y_mod[nonzero_rows] self.final_layer_output = self.final_layer_output[nonzero_rows] self.finetune_cost = T.mean( T.sum((self.final_layer_output - self.y_mod)**2, axis=1)) self.errors = T.mean( T.sum((self.final_layer_output - self.y_mod)**2, axis=1)) else: self.finetune_cost = T.mean( T.sum((self.final_layer.output - self.y)**2, axis=1)) self.errors = T.mean( T.sum((self.final_layer.output - self.y)**2, axis=1))
def __init__(self, n_in, hidden_layer_size, n_out, L1_reg, L2_reg, hidden_layer_type, output_type='LINEAR', network_type='S2S', ed_type='HED', dropout_rate=0.0, optimizer='sgd', MLU_div_lengths=[], loss_function='MMSE', rnn_batch_training=False): """ This function initialises a neural network :param n_in: Dimensionality of input features :type in: Integer :param hidden_layer_size: The layer size for each hidden layer :type hidden_layer_size: A list of integers :param n_out: Dimensionality of output features :type n_out: Integrer :param hidden_layer_type: the activation types of each hidden layers, e.g., TANH, LSTM, GRU, BLSTM :param L1_reg: the L1 regulasation weight :param L2_reg: the L2 regulasation weight :param output_type: the activation type of the output layer, by default is 'LINEAR', linear regression. :param dropout_rate: probability of dropout, a float number between 0 and 1. """ logger = logging.getLogger("DNN initialization") self.n_in = int(n_in) self.n_out = int(n_out) self.n_layers = len(hidden_layer_size) self.dropout_rate = dropout_rate self.optimizer = optimizer self.loss_function = loss_function self.is_train = T.iscalar('is_train') self.rnn_batch_training = rnn_batch_training assert len(hidden_layer_size) == len(hidden_layer_type) self.list_of_activations = [ 'TANH', 'SIGMOID', 'SOFTMAX', 'RELU', 'RESU' ] BLSTM_variants = ['BLSTM', 'BSLSTM', 'BLSTME', 'BSLSTME'] Encoder_variants = ['RNNE', 'LSTME', 'BLSTME', 'SLSTME', 'TANHE'] Decoder_variants = ['RNND', 'LSTMD', 'SLSTMD'] if self.rnn_batch_training: self.x = T.tensor3('x') self.y = T.tensor3('y') else: self.x = T.matrix('x') self.y = T.matrix('y') if network_type == "S2S": self.d = T.ivector('d') self.f = T.matrix('f') self.L1_reg = L1_reg self.L2_reg = L2_reg self.rnn_layers = [] self.params = [] self.delta_params = [] rng = np.random.RandomState(123) prev_seg_end = 0 encoder_count = 0 MLU_div = MLU_div_lengths for i in range(self.n_layers): if i == 0: input_size = n_in else: input_size = hidden_layer_size[i - 1] if hidden_layer_type[i - 1] in BLSTM_variants: input_size = hidden_layer_size[i - 1] * 2 if i == 0: layer_input = self.x else: layer_input = self.rnn_layers[i - 1].output ### sequence-to-sequence mapping ### if hidden_layer_type[i - 1] in Encoder_variants: dur_input = self.d frame_feat_input = self.f # vanilla encoder-decoder (phone-level features) if ed_type == "VED": seq2seq_model = DistributedSequenceEncoder( rng, layer_input, dur_input) layer_input = T.concatenate( (seq2seq_model.encoded_output, frame_feat_input), axis=1) input_size = input_size + 4 # hierarchical encoder-decoder elif ed_type == "HED": seg_len = layer_input.size // input_size seg_dur_input = dur_input[prev_seg_end:prev_seg_end + seg_len] num_of_segs = T.sum(seg_dur_input) seq2seq_model = DistributedSequenceEncoder( rng, layer_input, seg_dur_input) addfeat_input = frame_feat_input[ 0:num_of_segs, MLU_div[encoder_count]:MLU_div[encoder_count + 1]] layer_input = T.concatenate( (seq2seq_model.encoded_output, addfeat_input), axis=1) input_size = input_size + (MLU_div[encoder_count + 1] - MLU_div[encoder_count]) prev_seg_end = prev_seg_end + seg_len encoder_count = encoder_count + 1 # hidden layer activation if hidden_layer_type[i] in self.list_of_activations: hidden_activation = hidden_layer_type[i].lower() hidden_layer = GeneralLayer(rng, layer_input, input_size, hidden_layer_size[i], activation=hidden_activation, p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'TANHE' or hidden_layer_type[ i] == 'SIGMOIDE': hidden_activation = hidden_layer_type[i][0:-1].lower() hidden_layer = GeneralLayer(rng, layer_input, input_size, hidden_layer_size[i], activation=hidden_activation, p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'TANH_LHUC': hidden_layer = SigmoidLayer_LHUC(rng, layer_input, input_size, hidden_layer_size[i], activation=T.tanh, p=self.dropout_rate, training=self.is_train) elif hidden_layer_type[i] == 'SLSTM' or hidden_layer_type[ i] == 'SLSTME': hidden_layer = SimplifiedLstm( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'SLSTMD': hidden_layer = SimplifiedLstmDecoder( rng, layer_input, input_size, hidden_layer_size[i], self.n_out, p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'SGRU': hidden_layer = SimplifiedGRU( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'GRU': hidden_layer = GatedRecurrentUnit( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM' or hidden_layer_type[ i] == 'LSTME': hidden_layer = VanillaLstm( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTMD': hidden_layer = VanillaLstmDecoder( rng, layer_input, input_size, hidden_layer_size[i], self.n_out, p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'BSLSTM' or hidden_layer_type[ i] == 'BSLSTME': hidden_layer = BidirectionSLstm( rng, layer_input, input_size, hidden_layer_size[i], hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'BLSTM' or hidden_layer_type[ i] == 'BLSTME': hidden_layer = BidirectionLstm( rng, layer_input, input_size, hidden_layer_size[i], hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'RNN' or hidden_layer_type[ i] == 'RNNE': hidden_layer = VanillaRNN( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'RNND': hidden_layer = VanillaRNNDecoder( rng, layer_input, input_size, hidden_layer_size[i], self.n_out, p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) elif hidden_layer_type[i] == 'LSTM_LHUC': hidden_layer = VanillaLstm_LHUC( rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) else: logger.critical( "This hidden layer type: %s is not supported right now! \n Please use one of the following: SLSTM, BSLSTM, TANH, SIGMOID\n" % (hidden_layer_type[i])) sys.exit(1) self.rnn_layers.append(hidden_layer) self.params.extend(hidden_layer.params) input_size = hidden_layer_size[-1] if hidden_layer_type[-1] in BLSTM_variants: input_size = hidden_layer_size[-1] * 2 if hidden_layer_type[-1] in Decoder_variants: self.final_layer = self.rnn_layers[-1] else: output_activation = output_type.lower() if output_activation == 'linear': self.final_layer = LinearLayer(rng, self.rnn_layers[-1].output, input_size, self.n_out) elif output_activation == 'recurrent': self.final_layer = RecurrentOutputLayer( rng, self.rnn_layers[-1].output, input_size, self.n_out, rnn_batch_training=self.rnn_batch_training) elif output_type.upper() in self.list_of_activations: self.final_layer = GeneralLayer(rng, self.rnn_layers[-1].output, input_size, self.n_out, activation=output_activation) else: logger.critical( "This output layer type: %s is not supported right now! \n Please use one of the following: LINEAR, BSLSTM\n" % (output_type)) sys.exit(1) self.params.extend(self.final_layer.params) self.updates = {} for param in self.params: self.updates[param] = theano.shared( value=np.zeros(param.get_value(borrow=True).shape, dtype=theano.config.floatX), name='updates') if self.loss_function == 'CCE': self.finetune_cost = self.categorical_crossentropy_loss( self.final_layer.output, self.y) self.errors = self.categorical_crossentropy_loss( self.final_layer.output, self.y) elif self.loss_function == 'Hinge': self.finetune_cost = self.multiclass_hinge_loss( self.final_layer.output, self.y) self.errors = self.multiclass_hinge_loss(self.final_layer.output, self.y) elif self.loss_function == 'MMSE': if self.rnn_batch_training: self.y_mod = T.reshape(self.y, (-1, n_out)) self.final_layer_output = T.reshape(self.final_layer.output, (-1, n_out)) nonzero_rows = T.any(self.y_mod, 1).nonzero() self.y_mod = self.y_mod[nonzero_rows] self.final_layer_output = self.final_layer_output[nonzero_rows] self.finetune_cost = T.mean( T.sum((self.final_layer_output - self.y_mod)**2, axis=1)) self.errors = T.mean( T.sum((self.final_layer_output - self.y_mod)**2, axis=1)) else: self.finetune_cost = T.mean( T.sum((self.final_layer.output - self.y)**2, axis=1)) self.errors = T.mean( T.sum((self.final_layer.output - self.y)**2, axis=1))
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, n_outs=10, l1_reg = None, l2_reg = None, hidden_layers_sizes=[500, 500], hidden_activation='tanh', output_activation='linear', projection_insize=100, projection_outsize=10, first_layer_split=True, expand_by_minibatch=False, initial_projection_distrib='gaussian', use_rprop=0, rprop_init_update=0.001): ## beginning at label index 1, 5 blocks of 49 inputs each to be projected to 10 dim. logger = logging.getLogger("TP-DNN initialization") self.projection_insize = projection_insize self.projection_outsize = projection_outsize self.sigmoid_layers = [] self.params = [] self.delta_params = [] self.n_layers = len(hidden_layers_sizes) self.output_activation = output_activation self.use_rprop = use_rprop self.rprop_init_update = rprop_init_update self.l1_reg = l1_reg self.l2_reg = l2_reg assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) self.numpy_rng = numpy_rng # allocate symbolic variables for the data self.x = T.matrix('x') if expand_by_minibatch: self.x_proj = T.ivector('x_proj') else: self.x_proj = T.matrix('x_proj') self.y = T.matrix('y') if expand_by_minibatch: z = theano.tensor.zeros((self.x_proj.shape[0], self.projection_insize)) indexes = self.x_proj one_hot = theano.tensor.set_subtensor(z[theano.tensor.arange(self.x_proj.shape[0]), indexes], 1) projection_input = one_hot else: projection_input = self.x_proj ## Make projection layer self.projection_layer = TokenProjectionLayer(rng=numpy_rng, input=projection_input, projection_insize = self.projection_insize, projection_outsize = self.projection_outsize, initial_projection_distrib=initial_projection_distrib) self.params.extend(self.projection_layer.params) self.delta_params.extend(self.projection_layer.delta_params) first_layer_input = T.concatenate([self.x, self.projection_layer.output], axis=1) for i in xrange(self.n_layers): if i == 0: input_size = n_ins + self.projection_outsize else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = first_layer_input else: layer_input = self.sigmoid_layers[-1].output if i == 0 and first_layer_split: sigmoid_layer = SplitHiddenLayer(rng=numpy_rng, input=layer_input, n_in1=n_ins, n_in2=self.projection_outsize, n_out=hidden_layers_sizes[i], activation=T.tanh) ##T.nnet.sigmoid) # else: sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.tanh) ##T.nnet.sigmoid) # self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) self.delta_params.extend(sigmoid_layer.delta_params) # add final layer if self.output_activation == 'linear': self.final_layer = LinearLayer(rng = numpy_rng, input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) elif self.output_activation == 'sigmoid': self.final_layer = SigmoidLayer( rng = numpy_rng, input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs, activation=T.nnet.sigmoid) else: logger.critical("This output activation function: %s is not supported right now!" %(self.output_activation)) sys.exit(1) self.params.extend(self.final_layer.params) self.delta_params.extend(self.final_layer.delta_params) ## params for 2 hidden layers, projection, first split layer, will look like this: ## [W_proj; W_1a, W_1b, b_1; W_2 b_2; W_o, b_o] ### MSE self.finetune_cost = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 )) self.errors = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 ))
def __init__(self, numpy_rng, theano_rng=None, n_ins=784, n_outs=10, l1_reg = None, l2_reg = None, hidden_layers_sizes=[500, 500], hidden_activation='tanh', output_activation='linear', use_rprop=0, rprop_init_update=0.001): logger = logging.getLogger("DNN initialization") self.sigmoid_layers = [] self.params = [] self.delta_params = [] self.n_layers = len(hidden_layers_sizes) self.output_activation = output_activation self.use_rprop = use_rprop self.rprop_init_update = rprop_init_update self.l1_reg = l1_reg self.l2_reg = l2_reg assert self.n_layers > 0 if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) # allocate symbolic variables for the data self.x = T.matrix('x') self.y = T.matrix('y') for i in xrange(self.n_layers): if i == 0: input_size = n_ins else: input_size = hidden_layers_sizes[i - 1] if i == 0: layer_input = self.x else: layer_input = self.sigmoid_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=T.tanh) ##T.nnet.sigmoid) # self.sigmoid_layers.append(sigmoid_layer) self.params.extend(sigmoid_layer.params) self.delta_params.extend(sigmoid_layer.delta_params) # add final layer if self.output_activation == 'linear': self.final_layer = LinearLayer(rng = numpy_rng, input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) elif self.output_activation == 'sigmoid': self.final_layer = SigmoidLayer( rng = numpy_rng, input=self.sigmoid_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs, activation=T.nnet.sigmoid) else: logger.critical("This output activation function: %s is not supported right now!" %(self.output_activation)) sys.exit(1) self.params.extend(self.final_layer.params) self.delta_params.extend(self.final_layer.delta_params) ### MSE self.finetune_cost = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 )) self.errors = T.mean(T.sum( (self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1 )) ### L1-norm if self.l1_reg is not None: for i in xrange(self.n_layers): W = self.params[i * 2] self.finetune_cost += self.l1_reg * (abs(W).sum()) ### L2-norm if self.l2_reg is not None: for i in xrange(self.n_layers): W = self.params[i * 2] self.finetune_cost += self.l2_reg * T.sqr(W).sum()