示例#1
0
文件: dnn.py 项目: synetkim/multi_asr
class DNN(object):
    def __init__(
            self,
            numpy_rng,
            theano_rng=None,
            cfg=None,  # the network configuration
            dnn_shared=None,
            shared_layers=[],
            input=None):

        self.layers = []
        self.params = []
        self.delta_params = []

        self.cfg = cfg
        self.n_ins = cfg.n_ins
        self.n_outs = cfg.n_outs
        self.hidden_layers_sizes = cfg.hidden_layers_sizes
        self.hidden_layers_number = len(self.hidden_layers_sizes)
        self.activation = cfg.activation

        self.do_maxout = cfg.do_maxout
        self.pool_size = cfg.pool_size

        self.max_col_norm = cfg.max_col_norm
        self.l1_reg = cfg.l1_reg
        self.l2_reg = cfg.l2_reg

        self.non_updated_layers = cfg.non_updated_layers

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        # allocate symbolic variables for the data
        if input == None:
            self.x = T.matrix('x')
        else:
            self.x = input
        self.y = T.ivector('y')

        for i in xrange(self.hidden_layers_number):
            # construct the hidden layer
            if i == 0:
                input_size = self.n_ins
                layer_input = self.x
            else:
                input_size = self.hidden_layers_sizes[i - 1]
                layer_input = self.layers[-1].output

            W = None
            b = None
            if (i in shared_layers):
                W = dnn_shared.layers[i].W
                b = dnn_shared.layers[i].b
            if self.do_maxout == True:
                hidden_layer = HiddenLayer(rng=numpy_rng,
                                           input=layer_input,
                                           n_in=input_size,
                                           n_out=self.hidden_layers_sizes[i] *
                                           self.pool_size,
                                           W=W,
                                           b=b,
                                           activation=(lambda x: 1.0 * x),
                                           do_maxout=True,
                                           pool_size=self.pool_size)
            else:
                hidden_layer = HiddenLayer(rng=numpy_rng,
                                           input=layer_input,
                                           n_in=input_size,
                                           n_out=self.hidden_layers_sizes[i],
                                           W=W,
                                           b=b,
                                           activation=self.activation)
            # add the layer to our list of layers
            self.layers.append(hidden_layer)
            # if the layer index is included in self.non_updated_layers, parameters of this layer will not be updated
            if (i not in self.non_updated_layers):
                self.params.extend(hidden_layer.params)
                self.delta_params.extend(hidden_layer.delta_params)
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(input=self.layers[-1].output,
                                           n_in=self.hidden_layers_sizes[-1],
                                           n_out=self.n_outs)

        if self.n_outs > 0:
            self.layers.append(self.logLayer)
            self.params.extend(self.logLayer.params)
            self.delta_params.extend(self.logLayer.delta_params)

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

        if self.l1_reg is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                self.finetune_cost += self.l1_reg * (abs(W).sum())

        if self.l2_reg is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                self.finetune_cost += self.l2_reg * T.sqr(W).sum()

    def build_finetune_functions(self, train_shared_xy, valid_shared_xy,
                                 batch_size):

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        index = T.lscalar('index')  # index to a [mini]batch
        learning_rate = T.fscalar('learning_rate')
        momentum = T.fscalar('momentum')

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = collections.OrderedDict()
        for dparam, gparam in zip(self.delta_params, gparams):
            updates[dparam] = momentum * dparam - gparam * learning_rate
        for dparam, param in zip(self.delta_params, self.params):
            updates[param] = param + updates[dparam]

        if self.max_col_norm is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                if W in updates:
                    updated_W = updates[W]
                    col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0))
                    desired_norms = T.clip(col_norms, 0, self.max_col_norm)
                    updates[W] = updated_W * (desired_norms /
                                              (1e-7 + col_norms))

        train_fn = theano.function(
            inputs=[
                index,
                theano.Param(learning_rate, default=0.0001),
                theano.Param(momentum, default=0.5)
            ],
            outputs=self.errors,
            updates=updates,
            givens={
                self.x:
                train_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                train_set_y[index * batch_size:(index + 1) * batch_size]
            })

        valid_fn = theano.function(
            inputs=[index],
            outputs=self.errors,
            givens={
                self.x:
                valid_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                valid_set_y[index * batch_size:(index + 1) * batch_size]
            })

        return train_fn, valid_fn

    def build_extract_feat_function(self, output_layer):

        feat = T.matrix('feat')
        out_da = theano.function([feat],
                                 self.layers[output_layer].output,
                                 updates=None,
                                 givens={self.x: feat},
                                 on_unused_input='warn')
        return out_da

    def build_finetune_functions_kaldi(self, train_shared_xy, valid_shared_xy):

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        index = T.lscalar('index')  # index to a [mini]batch
        learning_rate = T.fscalar('learning_rate')
        momentum = T.fscalar('momentum')

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = collections.OrderedDict()
        for dparam, gparam in zip(self.delta_params, gparams):
            updates[dparam] = momentum * dparam - gparam * learning_rate
        for dparam, param in zip(self.delta_params, self.params):
            updates[param] = param + updates[dparam]

        if self.max_col_norm is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                if W in updates:
                    updated_W = updates[W]
                    col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0))
                    desired_norms = T.clip(col_norms, 0, self.max_col_norm)
                    updates[W] = updated_W * (desired_norms /
                                              (1e-7 + col_norms))

        train_fn = theano.function(inputs=[
            theano.Param(learning_rate, default=0.0001),
            theano.Param(momentum, default=0.5)
        ],
                                   outputs=self.errors,
                                   updates=updates,
                                   givens={
                                       self.x: train_set_x,
                                       self.y: train_set_y
                                   })

        valid_fn = theano.function(inputs=[],
                                   outputs=self.errors,
                                   givens={
                                       self.x: valid_set_x,
                                       self.y: valid_set_y
                                   })

        return train_fn, valid_fn

    def write_model_to_raw(self, file_path):
        # output the model to tmp_path; this format is readable by PDNN
        _nnet2file(self.layers, filename=file_path)

    def write_model_to_kaldi(self, file_path, with_softmax=True):
        # determine whether it's BNF based on layer sizes
        output_layer_number = -1
        for layer_index in range(1, self.hidden_layers_number - 1):
            cur_layer_size = self.hidden_layers_sizes[layer_index]
            prev_layer_size = self.hidden_layers_sizes[layer_index - 1]
            next_layer_size = self.hidden_layers_sizes[layer_index + 1]
            if cur_layer_size < prev_layer_size and cur_layer_size < next_layer_size:
                output_layer_number = layer_index + 1
                break

        layer_number = len(self.layers)
        if output_layer_number == -1:
            output_layer_number = layer_number

        fout = open(file_path, 'wb')
        for i in xrange(output_layer_number):
            activation_text = '<' + self.cfg.activation_text + '>'
            if i == (
                    layer_number - 1
            ) and with_softmax:  # we assume that the last layer is a softmax layer
                activation_text = '<softmax>'
            W_mat = self.layers[i].W.get_value()
            b_vec = self.layers[i].b.get_value()
            input_size, output_size = W_mat.shape
            W_layer = []
            b_layer = ''
            for rowX in xrange(output_size):
                W_layer.append('')

            for x in xrange(input_size):
                for t in xrange(output_size):
                    W_layer[t] = W_layer[t] + str(W_mat[x][t]) + ' '

            for x in xrange(output_size):
                b_layer = b_layer + str(b_vec[x]) + ' '

            fout.write('<affinetransform> ' + str(output_size) + ' ' +
                       str(input_size) + '\n')
            fout.write('[' + '\n')
            for x in xrange(output_size):
                fout.write(W_layer[x].strip() + '\n')
            fout.write(']' + '\n')
            fout.write('[ ' + b_layer.strip() + ' ]' + '\n')
            if activation_text == '<maxout>':
                fout.write(activation_text + ' ' +
                           str(output_size / self.pool_size) + ' ' +
                           str(output_size) + '\n')
            else:
                fout.write(activation_text + ' ' + str(output_size) + ' ' +
                           str(output_size) + '\n')
        fout.close()
示例#2
0
    def __init__(self, numpy_rng, theano_rng=None,
                 cfg = None,  # the network configuration
                 dnn_shared = None, shared_layers=[], input = None):

        self.cfg = cfg
        self.params = []
        self.delta_params   = []
        self.n_ins = cfg.n_ins; self.n_outs = cfg.n_outs
        self.l1_reg = cfg.l1_reg
        self.l2_reg = cfg.l2_reg
        self.do_maxout = cfg.do_maxout; self.pool_size = cfg.pool_size
        self.max_col_norm = cfg.max_col_norm
        print self.max_col_norm

        self.layers = []
        self.lstm_layers = []
        self.fc_layers = []

        # 1. lstm
        self.lstm_layers_sizes = cfg.lstm_layers_sizes
        self.lstm_layers_number = len(self.lstm_layers_sizes)
        # 2. dnn
        self.hidden_layers_sizes = cfg.hidden_layers_sizes
        self.hidden_layers_number = len(self.hidden_layers_sizes)
        self.activation = cfg.activation


        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        if input == None:
            self.x = T.matrix('x')
        else:
            self.x = input 
        self.y = T.ivector('y')

        #######################
        # build lstm layers   #
        #######################
        print '1. start to build lstm layer: '+ str(self.lstm_layers_number)
        for i in xrange(self.lstm_layers_number):
            if i == 0:
                input_size = self.n_ins
                input = self.x
            else:
                input_size = self.lstm_layers_sizes[i - 1]
                input = self.layers[-1].output
            lstm_layer = LSTMLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.lstm_layers_sizes[i])
            print '\tbuild lstm layer: ' + str(input_size) +' x '+ str(lstm_layer.n_out)
            self.layers.append(lstm_layer)
            self.lstm_layers.append(lstm_layer)
            self.params.extend(lstm_layer.params)
            self.delta_params.extend(lstm_layer.delta_params)
        print '1. finish lstm layer: '+ str(self.layers[-1].n_out)

        #######################
        # build dnnv layers   #
        #######################
        #print '2. start to build dnnv layer: '+ str(self.hidden_layers_number)
        #for i in xrange(self.hidden_layers_number):
        #    if i == 0:
        #        input_size = self.layers[-1].n_out
        #    else:
        #        input_size = self.hidden_layers_sizes[i - 1]
        #    input = self.layers[-1].output
        #    fc_layer = HiddenLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.hidden_layers_sizes[i], activation=self.activation)
        #    print '\tbuild dnnv layer: ' + str(input_size) +' x '+ str(fc_layer.n_out)
        #    self.layers.append(fc_layer)
        #    self.fc_layers.append(fc_layer)
        #    self.params.extend(fc_layer.params)
        #    self.delta_params.extend(fc_layer.delta_params)
        #print '2. finish dnnv layer: '+ str(self.layers[-1].n_out)

        #######################
        # build log layers   #
        #######################
        print '3. start to build log layer: 1'
        input_size = self.layers[-1].n_out
        input = self.layers[-1].output
        logLayer = LogisticRegression(input=input, n_in=input_size, n_out=self.n_outs)
        print '\tbuild final layer: ' + str(input_size) +' x '+ str(self.n_outs)
        self.layers.append(logLayer)
        self.params.extend(logLayer.params)
        self.delta_params.extend(logLayer.delta_params)
        print '3. finish log layer: '+ str(self.layers[-1].n_out)
        print 'Total layers: '+ str(len(self.layers))

        sys.stdout.flush()

        self.finetune_cost = logLayer.negative_log_likelihood(self.y)
        self.errors = logLayer.errors(self.y)
示例#3
0
文件: cnn_sat.py 项目: xczhanjun/pdnn
class CNN_SAT(object):
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 batch_size=256,
                 n_outs=500,
                 conv_layer_configs=[],
                 hidden_layers_sizes=[500, 500],
                 ivec_layers_sizes=[500, 500],
                 conv_activation=T.nnet.sigmoid,
                 full_activation=T.nnet.sigmoid,
                 use_fast=False,
                 update_part=[0, 1],
                 ivec_dim=100):

        self.conv_layers = []
        self.full_layers = []
        self.ivec_layers = []

        self.params = []
        self.delta_params = []

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')
        self.y = T.ivector('y')

        input_shape = conv_layer_configs[0]['input_shape']
        n_ins = input_shape[-1] * input_shape[-2] * input_shape[-3]

        self.iv = self.x[:, n_ins:n_ins + ivec_dim]
        self.raw = self.x[:, 0:n_ins]

        self.conv_layer_num = len(conv_layer_configs)
        self.full_layer_num = len(hidden_layers_sizes)
        self.ivec_layer_num = len(ivec_layers_sizes)

        # construct the adaptation NN
        for i in xrange(self.ivec_layer_num):
            if i == 0:
                input_size = ivec_dim
                layer_input = self.iv
            else:
                input_size = ivec_layers_sizes[i - 1]
                layer_input = self.ivec_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=ivec_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.ivec_layers.append(sigmoid_layer)
            if 0 in update_part:
                self.params.extend(sigmoid_layer.params)
                self.delta_params.extend(sigmoid_layer.delta_params)

        linear_func = lambda x: x
        sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                    input=self.ivec_layers[-1].output,
                                    n_in=ivec_layers_sizes[-1],
                                    n_out=n_ins,
                                    activation=linear_func)
        self.ivec_layers.append(sigmoid_layer)
        if 0 in update_part:
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

        for i in xrange(self.conv_layer_num):
            if i == 0:
                input = self.raw + self.ivec_layers[-1].output
            else:
                input = self.conv_layers[-1].output
            config = conv_layer_configs[i]
            conv_layer = ConvLayer(numpy_rng=numpy_rng,
                                   input=input,
                                   input_shape=config['input_shape'],
                                   filter_shape=config['filter_shape'],
                                   poolsize=config['poolsize'],
                                   activation=conv_activation,
                                   flatten=config['flatten'],
                                   use_fast=use_fast)
            self.conv_layers.append(conv_layer)
            if 1 in update_part:
                self.params.extend(conv_layer.params)
                self.delta_params.extend(conv_layer.delta_params)

        self.conv_output_dim = config['output_shape'][1] * config[
            'output_shape'][2] * config['output_shape'][3]

        for i in xrange(self.full_layer_num):
            # construct the sigmoidal layer
            if i == 0:
                input_size = self.conv_output_dim
                layer_input = self.conv_layers[-1].output
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.full_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=full_activation)
            # add the layer to our list of layers
            self.full_layers.append(sigmoid_layer)
            if 1 in update_part:
                self.params.extend(sigmoid_layer.params)
                self.delta_params.extend(sigmoid_layer.delta_params)
# We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(input=self.full_layers[-1].output,
                                           n_in=hidden_layers_sizes[-1],
                                           n_out=n_outs)
        self.full_layers.append(self.logLayer)
        if 1 in update_part:
            self.params.extend(self.logLayer.params)
            self.delta_params.extend(self.logLayer.delta_params)

        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        self.errors = self.logLayer.errors(self.y)

    def kl_divergence(self, p, p_hat):
        return p * T.log(p / p_hat) + (1 - p) * T.log((1 - p) / (1 - p_hat))

    def build_finetune_functions(self, train_shared_xy, valid_shared_xy,
                                 batch_size):

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        index = T.lscalar('index')  # index to a [mini]batch
        learning_rate = T.fscalar('learning_rate')
        momentum = T.fscalar('momentum')

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = updates = collections.OrderedDict()

        for dparam, gparam in zip(self.delta_params, gparams):
            updates[dparam] = momentum * dparam - gparam * learning_rate
        for dparam, param in zip(self.delta_params, self.params):
            updates[param] = param + updates[dparam]

        train_fn = theano.function(
            inputs=[
                index,
                theano.Param(learning_rate, default=0.0001),
                theano.Param(momentum, default=0.5)
            ],
            outputs=self.errors,
            updates=updates,
            givens={
                self.x:
                train_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                train_set_y[index * batch_size:(index + 1) * batch_size]
            })

        valid_fn = theano.function(
            inputs=[index],
            outputs=self.errors,
            givens={
                self.x:
                valid_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                valid_set_y[index * batch_size:(index + 1) * batch_size]
            })

        return train_fn, valid_fn
示例#4
0
class CNN_SAT(object):

    def __init__(self, numpy_rng, theano_rng=None,
                 batch_size = 256, n_outs=500,
                 conv_layer_configs = [],
                 hidden_layers_sizes=[500, 500],
                 ivec_layers_sizes=[500, 500],
                 conv_activation = T.nnet.sigmoid,
                 full_activation = T.nnet.sigmoid,
                 use_fast = False,
                 update_part = [0, 1],
                 ivec_dim = 100):

        self.conv_layers = []
        self.full_layers = []
        self.ivec_layers = [] 
        
        self.params = []
        self.delta_params   = []

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  
        self.y = T.ivector('y') 
       
        input_shape = conv_layer_configs[0]['input_shape']
        n_ins = input_shape[-1] * input_shape[-2] * input_shape[-3]

        self.iv = self.x[:,n_ins:n_ins+ivec_dim]
        self.raw = self.x[:,0:n_ins]
 
        self.conv_layer_num = len(conv_layer_configs)
        self.full_layer_num = len(hidden_layers_sizes)
        self.ivec_layer_num = len(ivec_layers_sizes)

        # construct the adaptation NN
        for i in xrange(self.ivec_layer_num):
            if i == 0:
                input_size = ivec_dim
                layer_input = self.iv
            else:
                input_size = ivec_layers_sizes[i - 1]
                layer_input = self.ivec_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=ivec_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.ivec_layers.append(sigmoid_layer)
            if 0 in update_part:
                self.params.extend(sigmoid_layer.params)
                self.delta_params.extend(sigmoid_layer.delta_params)

        linear_func = lambda x: x
        sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                    input=self.ivec_layers[-1].output,
                                    n_in=ivec_layers_sizes[-1],
                                    n_out=n_ins,
                                    activation=linear_func)
        self.ivec_layers.append(sigmoid_layer)
        if 0 in update_part:
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)


        for i in xrange(self.conv_layer_num):
            if i == 0:
                input = self.raw + self.ivec_layers[-1].output 
            else:
                input = self.conv_layers[-1].output
            config = conv_layer_configs[i]
            conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input,
			input_shape = config['input_shape'], filter_shape = config['filter_shape'], poolsize = config['poolsize'],
			activation = conv_activation, flatten = config['flatten'], use_fast = use_fast)
	    self.conv_layers.append(conv_layer)
            if 1 in update_part:
	        self.params.extend(conv_layer.params)
                self.delta_params.extend(conv_layer.delta_params)

        self.conv_output_dim = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3]

        for i in xrange(self.full_layer_num):
            # construct the sigmoidal layer
            if i == 0:
                input_size = self.conv_output_dim
                layer_input = self.conv_layers[-1].output
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.full_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=full_activation)
            # add the layer to our list of layers
            self.full_layers.append(sigmoid_layer)
            if 1 in update_part:
                self.params.extend(sigmoid_layer.params)
                self.delta_params.extend(sigmoid_layer.delta_params)
	# We now need to add a logistic layer on top of the MLP
	self.logLayer = LogisticRegression(
			       input=self.full_layers[-1].output,
			       n_in=hidden_layers_sizes[-1], n_out=n_outs)
        self.full_layers.append(self.logLayer)
        if 1 in update_part:
            self.params.extend(self.logLayer.params)
            self.delta_params.extend(self.logLayer.delta_params)

        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        self.errors = self.logLayer.errors(self.y)

    def kl_divergence(self, p, p_hat):
        return p * T.log(p / p_hat) + (1 - p) * T.log((1 - p) / (1 - p_hat))

    def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size):

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        index = T.lscalar('index')  # index to a [mini]batch
        learning_rate = T.fscalar('learning_rate')
        momentum = T.fscalar('momentum')

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = {}

        for dparam, gparam in zip(self.delta_params, gparams):
            updates[dparam] = momentum * dparam - gparam*learning_rate
        for dparam, param in zip(self.delta_params, self.params):
            updates[param] = param + updates[dparam]

        train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001),
              theano.Param(momentum, default = 0.5)],
              outputs=self.errors,
              updates=updates,
              givens={
                self.x: train_set_x[index * batch_size:
                                    (index + 1) * batch_size],
		self.y: train_set_y[index * batch_size:
			            (index + 1) * batch_size]})

        valid_fn = theano.function(inputs=[index],
              outputs=self.errors,
              givens={
                self.x: valid_set_x[index * batch_size:
                                    (index + 1) * batch_size],
		self.y: valid_set_y[index * batch_size:
			            (index + 1) * batch_size]})

        return train_fn, valid_fn
示例#5
0
    def __init__(self, numpy_rng, theano_rng=None,
                 batch_size = 256, n_outs=500,
		 sparsity = None, sparsity_weight = None, sparse_layer = 3,
                 conv_layer_configs = [],
                 hidden_layers_sizes=[500, 500],
                 conv_activation = T.nnet.sigmoid,
                 full_activation = T.nnet.sigmoid,
                 use_fast = False):

        self.layers = []
        self.params = []
        self.delta_params   = []

        self.sparsity = sparsity
        self.sparsity_weight = sparsity_weight
        self.sparse_layer = sparse_layer

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  
        self.y = T.ivector('y') 
        
        self.conv_layer_num = len(conv_layer_configs)
        self.full_layer_num = len(hidden_layers_sizes)

        for i in xrange(self.conv_layer_num):
            if i == 0:
                input = self.x
                is_input_layer = True
            else:
                input = self.layers[-1].output
                is_input_layer = False
            config = conv_layer_configs[i]
            conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input, is_input_layer = is_input_layer,
			input_shape = config['input_shape'], filter_shape = config['filter_shape'], poolsize = config['poolsize'],
			activation = conv_activation, flatten = config['flatten'])
	    self.layers.append(conv_layer)
	    self.params.extend(conv_layer.params)
            self.delta_params.extend(conv_layer.delta_params)

        self.conv_output_dim = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3]

        for i in xrange(self.full_layer_num):
            # construct the sigmoidal layer
            if i == 0:
                input_size = self.conv_output_dim
            else:
                input_size = hidden_layers_sizes[i - 1]
            layer_input = self.layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=full_activation)
            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

	# We now need to add a logistic layer on top of the MLP
	self.logLayer = LogisticRegression(
			       input=self.layers[-1].output,
			       n_in=hidden_layers_sizes[-1], n_out=n_outs)
        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)

	if self.sparsity_weight is not None:
            sparsity_level = T.extra_ops.repeat(self.sparsity, 630)
	    avg_act = self.sigmoid_layers[sparse_layer].output.mean(axis=0)
	    kl_div = self.kl_divergence(sparsity_level, avg_act)
	    self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) + self.sparsity_weight * kl_div.sum()     
	else:
            self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        self.errors = self.logLayer.errors(self.y)
示例#6
0
    def __init__(self, numpy_rng, theano_rng=None,
                 batch_size = 256, n_outs=500,
		 sparsity = None, sparsity_weight = None, sparse_layer = 3,
                 conv_layer_configs = [],
                 hidden_layers_sizes=[500, 500],
                 conv_activation = T.nnet.sigmoid,
                 full_activation = T.nnet.sigmoid,
                 use_fast = False):

        self.layers = []
        self.params = []
        self.delta_params   = []

        self.sparsity = sparsity
        self.sparsity_weight = sparsity_weight
        self.sparse_layer = sparse_layer

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  
        self.y = T.ivector('y') 
        
        self.conv_layer_num = len(conv_layer_configs)
        self.full_layer_num = len(hidden_layers_sizes)

        for i in xrange(self.conv_layer_num):
            if i == 0:
                input = self.x
                is_input_layer = True
            else:
                input = self.layers[-1].output
                is_input_layer = False
            config = conv_layer_configs[i]
            conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input, is_input_layer = is_input_layer,
			input_shape = config['input_shape'], filter_shape = config['filter_shape'], poolsize = config['poolsize'],
			activation = conv_activation, flatten = config['flatten'])
	    self.layers.append(conv_layer)
	    self.params.extend(conv_layer.params)
            self.delta_params.extend(conv_layer.delta_params)

        self.conv_output_dim = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3]

        for i in xrange(self.full_layer_num):
            # construct the sigmoidal layer
            if i == 0:
                input_size = self.conv_output_dim
            else:
                input_size = hidden_layers_sizes[i - 1]
            layer_input = self.layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=full_activation)
            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

	# We now need to add a logistic layer on top of the MLP
	self.logLayer = LogisticRegression(
			       input=self.layers[-1].output,
			       n_in=hidden_layers_sizes[-1], n_out=n_outs)
        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)

	if self.sparsity_weight is not None:
            sparsity_level = T.extra_ops.repeat(self.sparsity, 630)
	    avg_act = self.sigmoid_layers[sparse_layer].output.mean(axis=0)
	    kl_div = self.kl_divergence(sparsity_level, avg_act)
	    self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) + self.sparsity_weight * kl_div.sum()     
	else:
            self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        self.errors = self.logLayer.errors(self.y)
示例#7
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 activation = T.nnet.sigmoid,
                 do_maxout = False, pool_size = 1, 
                 do_pnorm = False, pnorm_order = 1,
                 max_col_norm = None, l1_reg = None, l2_reg = None,
                 ivec_layers_sizes=[500, 500], ivec_dim = 100):

        self.sigmoid_layers = []
        self.ivec_layers = []

        self.sigmoid_params = []    # params and delta_params for the DNN parameters; the sigmoid prefix is a bit confusing
        self.sigmoid_delta_params = []
        self.ivec_params = []       # params and delta_params for the iVecNN parameters
        self.ivec_delta_params = []
        self.params = []            # the params to be updated in the current training
        self.delta_params   = []

        self.n_layers = len(hidden_layers_sizes)
        self.ivec_layer_num = len(ivec_layers_sizes)

        self.max_col_norm = max_col_norm
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        self.y = T.ivector('y')
        
        # we assume that i-vectors are appended to speech features in a frame-wise manner
        self.iv = self.x[:,n_ins:n_ins+ivec_dim]
        self.raw = self.x[:,0:n_ins]

        # construct the iVecNN which generates linear feature shifts
        for i in xrange(self.ivec_layer_num):
            if i == 0:
                input_size = ivec_dim
                layer_input = self.iv
            else:
                input_size = ivec_layers_sizes[i - 1]
                layer_input = self.ivec_layers[-1].output

            ivec_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=ivec_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.ivec_layers.append(ivec_layer)
            self.ivec_params.extend(ivec_layer.params)
            self.ivec_delta_params.extend(ivec_layer.delta_params)

        # the final output layer which has the same dimension as the input features
        linear_func = lambda x: x
        ivec_layer = HiddenLayer(rng=numpy_rng,
                                 input=self.ivec_layers[-1].output,
                                 n_in=ivec_layers_sizes[-1],
                                 n_out=n_ins,
                                 activation=linear_func)
        self.ivec_layers.append(ivec_layer)
        self.ivec_params.extend(ivec_layer.params)
        self.ivec_delta_params.extend(ivec_layer.delta_params)

        # construct the DNN (canonical model)
        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_ins
                layer_input = self.raw + self.ivec_layers[-1].output
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.sigmoid_layers[-1].output

            if do_maxout == True:
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * pool_size,
                                        activation = (lambda x: 1.0*x),
                                        do_maxout = True, pool_size = pool_size)
            elif do_pnorm == True:
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * pool_size,
                                        activation = (lambda x: 1.0*x),
                                        do_pnorm = True, pool_size = pool_size, pnorm_order = pnorm_order)
            else:
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=activation)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            self.sigmoid_params.extend(sigmoid_layer.params)
            self.sigmoid_delta_params.extend(sigmoid_layer.delta_params)
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
                         input=self.sigmoid_layers[-1].output,
                         n_in=hidden_layers_sizes[-1], n_out=n_outs)

        self.sigmoid_layers.append(self.logLayer)
        self.sigmoid_params.extend(self.logLayer.params)
        self.sigmoid_delta_params.extend(self.logLayer.delta_params)
       
        # construct a function that implements one step of finetunining
        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)
示例#8
0
    def __init__(
            self,
            numpy_rng,
            theano_rng=None,
            cfg=None,  # the network configuration
            dnn_shared=None,
            shared_layers=[],
            input=None,
            draw=None):

        self.cfg = cfg
        self.params = []
        self.delta_params = []
        self.n_ins = cfg.n_ins
        self.n_outs = cfg.n_outs
        self.l1_reg = cfg.l1_reg
        self.l2_reg = cfg.l2_reg
        self.do_maxout = cfg.do_maxout
        self.pool_size = cfg.pool_size
        self.max_col_norm = 1
        print self.max_col_norm

        self.layers = []
        self.lstm_layers = []
        self.fc_layers = []

        # 1. lstm
        self.lstm_layers_sizes = cfg.lstm_layers_sizes
        self.lstm_layers_number = len(self.lstm_layers_sizes)
        # 2. dnn
        self.hidden_layers_sizes = cfg.hidden_layers_sizes
        self.hidden_layers_number = len(self.hidden_layers_sizes)
        self.activation = cfg.activation

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        if input == None:
            self.x = T.matrix('x')
        else:
            self.x = input
        self.y = T.ivector('y')

        #######################
        # build lstm layers   #
        #######################
        print '1. start to build AttendLSTMLayer : ' + str(
            self.lstm_layers_number) + ', n_attendout: ' + str(cfg.batch_size)
        for i in xrange(1):
            if i == 0:
                input_size = self.n_ins
                input = self.x
            else:
                input_size = self.lstm_layers_sizes[i - 1]
                input = self.layers[-1].output
            lstm_layer = AttendLSTMLayer(rng=numpy_rng,
                                         input=input,
                                         n_in=input_size,
                                         n_out=self.lstm_layers_sizes[i],
                                         steps=cfg.batch_size,
                                         draw=draw)
            print '\tbuild AttendLSTMLayer: ' + str(input_size) + ' x ' + str(
                lstm_layer.n_out)
            self.layers.append(lstm_layer)
            self.lstm_layers.append(lstm_layer)
            self.params.extend(lstm_layer.params)
            self.delta_params.extend(lstm_layer.delta_params)
        print '1. finish AttendLSTMLayer: ' + str(self.layers[-1].n_out)

        print '2. start to build LSTMLayer : ' + str(self.lstm_layers_number)
        for i in xrange(1, self.lstm_layers_number, 1):
            if i == 0:
                input_size = self.n_ins
                input = self.x
            else:
                input_size = self.lstm_layers_sizes[i - 1]
                input = self.layers[-1].output
            lstm_layer = LSTMLayer(rng=numpy_rng,
                                   input=input,
                                   n_in=input_size,
                                   n_out=self.lstm_layers_sizes[i])
            print '\tbuild LSTMLayer: ' + str(input_size) + ' x ' + str(
                lstm_layer.n_out)
            self.layers.append(lstm_layer)
            self.lstm_layers.append(lstm_layer)
            self.params.extend(lstm_layer.params)
            self.delta_params.extend(lstm_layer.delta_params)
        print '2. finish LSTMLayer: ' + str(self.layers[-1].n_out)

        #######################
        # build dnnv layers   #
        #######################
        #print '2. start to build dnnv layer: '+ str(self.hidden_layers_number)
        #for i in xrange(self.hidden_layers_number):
        #    if i == 0:
        #        input_size = self.layers[-1].n_out
        #    else:
        #        input_size = self.hidden_layers_sizes[i - 1]
        #    input = self.layers[-1].output
        #    fc_layer = HiddenLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.hidden_layers_sizes[i], activation=self.activation)
        #    print '\tbuild dnnv layer: ' + str(input_size) +' x '+ str(fc_layer.n_out)
        #    self.layers.append(fc_layer)
        #    self.fc_layers.append(fc_layer)
        #    self.params.extend(fc_layer.params)
        #    self.delta_params.extend(fc_layer.delta_params)
        #print '2. finish dnnv layer: '+ str(self.layers[-1].n_out)

        #######################
        # build log layers   #
        #######################
        print '3. start to build log layer: 1'
        input_size = self.layers[-1].n_out
        input = self.layers[-1].output
        logLayer = LogisticRegression(input=input,
                                      n_in=input_size,
                                      n_out=self.n_outs)
        print '\tbuild final layer: ' + str(input_size) + ' x ' + str(
            self.n_outs)
        self.layers.append(logLayer)
        self.params.extend(logLayer.params)
        self.delta_params.extend(logLayer.delta_params)
        print '3. finish log layer: ' + str(self.layers[-1].n_out)
        print 'Total layers: ' + str(len(self.layers))

        sys.stdout.flush()

        self.finetune_cost = logLayer.negative_log_likelihood(self.y)
        self.errors = logLayer.errors(self.y)
示例#9
0
文件: sda.py 项目: ducle90/chai_share
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 n_ins=784,
                 hidden_layers_sizes=[500, 500],
                 n_outs=10,
                 corruption_levels=[0.1, 0.1],
                 pool_size=3,
                 sparsity=None,
                 sparsity_weight=None,
                 first_reconstruct_activation=T.tanh):

        self.sigmoid_layers = []
        self.dA_layers = []
        self.params = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')
        self.y = T.ivector('y')

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.sigmoid_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] *
                                        pool_size,
                                        activation=(lambda x: 1.0 * x),
                                        do_maxout=True,
                                        pool_size=pool_size)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)

            self.params.extend(sigmoid_layer.params)

            # Construct a denoising autoencoder that shared weights with this layer
            if i == 0:
                reconstruct_activation = first_reconstruct_activation
            else:
                reconstruct_activation = (lambda x: 1.0 * x)
#               reconstruct_activation = first_reconstruct_activation
            dA_layer = dA_maxout(numpy_rng=numpy_rng,
                                 theano_rng=theano_rng,
                                 input=layer_input,
                                 n_visible=input_size,
                                 n_hidden=hidden_layers_sizes[i] * pool_size,
                                 W=sigmoid_layer.W,
                                 bhid=sigmoid_layer.b,
                                 sparsity=sparsity,
                                 sparsity_weight=sparsity_weight,
                                 pool_size=pool_size,
                                 reconstruct_activation=reconstruct_activation)
            self.dA_layers.append(dA_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.sigmoid_layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)

        self.sigmoid_layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
示例#10
0
class DNN_SAT(object):

    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 activation = T.nnet.sigmoid,
                 do_maxout = False, pool_size = 1, 
                 do_pnorm = False, pnorm_order = 1,
                 max_col_norm = None, l1_reg = None, l2_reg = None,
                 ivec_layers_sizes=[500, 500], ivec_dim = 100):

        self.sigmoid_layers = []
        self.ivec_layers = []

        self.sigmoid_params = []    # params and delta_params for the DNN parameters; the sigmoid prefix is a bit confusing
        self.sigmoid_delta_params = []
        self.ivec_params = []       # params and delta_params for the iVecNN parameters
        self.ivec_delta_params = []
        self.params = []            # the params to be updated in the current training
        self.delta_params   = []

        self.n_layers = len(hidden_layers_sizes)
        self.ivec_layer_num = len(ivec_layers_sizes)

        self.max_col_norm = max_col_norm
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        self.y = T.ivector('y')
        
        # we assume that i-vectors are appended to speech features in a frame-wise manner
        self.iv = self.x[:,n_ins:n_ins+ivec_dim]
        self.raw = self.x[:,0:n_ins]

        # construct the iVecNN which generates linear feature shifts
        for i in xrange(self.ivec_layer_num):
            if i == 0:
                input_size = ivec_dim
                layer_input = self.iv
            else:
                input_size = ivec_layers_sizes[i - 1]
                layer_input = self.ivec_layers[-1].output

            ivec_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=ivec_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.ivec_layers.append(ivec_layer)
            self.ivec_params.extend(ivec_layer.params)
            self.ivec_delta_params.extend(ivec_layer.delta_params)

        # the final output layer which has the same dimension as the input features
        linear_func = lambda x: x
        ivec_layer = HiddenLayer(rng=numpy_rng,
                                 input=self.ivec_layers[-1].output,
                                 n_in=ivec_layers_sizes[-1],
                                 n_out=n_ins,
                                 activation=linear_func)
        self.ivec_layers.append(ivec_layer)
        self.ivec_params.extend(ivec_layer.params)
        self.ivec_delta_params.extend(ivec_layer.delta_params)

        # construct the DNN (canonical model)
        for i in xrange(self.n_layers):
            if i == 0:
                input_size = n_ins
                layer_input = self.raw + self.ivec_layers[-1].output
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.sigmoid_layers[-1].output

            if do_maxout == True:
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * pool_size,
                                        activation = (lambda x: 1.0*x),
                                        do_maxout = True, pool_size = pool_size)
            elif do_pnorm == True:
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * pool_size,
                                        activation = (lambda x: 1.0*x),
                                        do_pnorm = True, pool_size = pool_size, pnorm_order = pnorm_order)
            else:
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=activation)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            self.sigmoid_params.extend(sigmoid_layer.params)
            self.sigmoid_delta_params.extend(sigmoid_layer.delta_params)
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
                         input=self.sigmoid_layers[-1].output,
                         n_in=hidden_layers_sizes[-1], n_out=n_outs)

        self.sigmoid_layers.append(self.logLayer)
        self.sigmoid_params.extend(self.logLayer.params)
        self.sigmoid_delta_params.extend(self.logLayer.delta_params)
       
        # construct a function that implements one step of finetunining
        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

    def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size):

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        index = T.lscalar('index')  # index to a [mini]batch
        learning_rate = T.fscalar('learning_rate')
        momentum = T.fscalar('momentum')

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = {}
        for dparam, gparam in zip(self.delta_params, gparams):
            updates[dparam] = momentum * dparam - gparam*learning_rate
        for dparam, param in zip(self.delta_params, self.params):
            updates[param] = param + updates[dparam]

        train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001),
              theano.Param(momentum, default = 0.5)],
              outputs=self.errors,
              updates=updates,
              givens={
                self.x: train_set_x[index * batch_size:
                                    (index + 1) * batch_size],
                self.y: train_set_y[index * batch_size:
                                    (index + 1) * batch_size]})

        valid_fn = theano.function(inputs=[index],
              outputs=self.errors,
              givens={
                self.x: valid_set_x[index * batch_size:
                                    (index + 1) * batch_size],
                self.y: valid_set_y[index * batch_size:
                                    (index + 1) * batch_size]})

        return train_fn, valid_fn
示例#11
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 cfg=None,
                 dnn_shared=None,
                 shared_layers=[]):

        self.layers = []
        self.dropout_layers = []
        self.params = []
        self.delta_params = []

        self.cfg = cfg
        self.n_ins = cfg.n_ins
        self.n_outs = cfg.n_outs
        self.hidden_layers_sizes = cfg.hidden_layers_sizes
        self.hidden_layers_number = len(self.hidden_layers_sizes)
        self.activation = cfg.activation

        self.do_maxout = cfg.do_maxout
        self.pool_size = cfg.pool_size
        self.input_dropout_factor = cfg.input_dropout_factor
        self.dropout_factor = cfg.dropout_factor

        self.max_col_norm = cfg.max_col_norm
        self.l1_reg = cfg.l1_reg
        self.l2_reg = cfg.l2_reg

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')
        self.y = T.ivector('y')

        for i in xrange(self.hidden_layers_number):
            # construct the hidden layer
            if i == 0:
                input_size = self.n_ins
                layer_input = self.x
                if self.input_dropout_factor > 0.0:
                    dropout_layer_input = _dropout_from_layer(
                        theano_rng, self.x, self.input_dropout_factor)
                else:
                    dropout_layer_input = self.x
            else:
                input_size = self.hidden_layers_sizes[i - 1]
                layer_input = (
                    1 - self.dropout_factor[i - 1]) * self.layers[-1].output
                dropout_layer_input = self.dropout_layers[-1].dropout_output

            W = None
            b = None
            if (i in shared_layers):
                W = dnn_shared.layers[i].W
                b = dnn_shared.layers[i].b

            if self.do_maxout == False:
                dropout_layer = DropoutHiddenLayer(
                    rng=numpy_rng,
                    input=dropout_layer_input,
                    n_in=input_size,
                    n_out=self.hidden_layers_sizes[i],
                    W=W,
                    b=b,
                    activation=self.activation,
                    dropout_factor=self.dropout_factor[i])
                hidden_layer = HiddenLayer(rng=numpy_rng,
                                           input=layer_input,
                                           n_in=input_size,
                                           n_out=self.hidden_layers_sizes[i],
                                           activation=self.activation,
                                           W=dropout_layer.W,
                                           b=dropout_layer.b)
            else:
                dropout_layer = DropoutHiddenLayer(
                    rng=numpy_rng,
                    input=dropout_layer_input,
                    n_in=input_size,
                    n_out=self.hidden_layers_sizes[i] * self.pool_size,
                    W=W,
                    b=b,
                    activation=(lambda x: 1.0 * x),
                    dropout_factor=self.dropout_factor[i],
                    do_maxout=True,
                    pool_size=self.pool_size)
                hidden_layer = HiddenLayer(rng=numpy_rng,
                                           input=layer_input,
                                           n_in=input_size,
                                           n_out=self.hidden_layers_sizes[i] *
                                           self.pool_size,
                                           activation=(lambda x: 1.0 * x),
                                           W=dropout_layer.W,
                                           b=dropout_layer.b,
                                           do_maxout=True,
                                           pool_size=self.pool_size)
            # add the layer to our list of layers
            self.layers.append(hidden_layer)
            self.dropout_layers.append(dropout_layer)
            self.params.extend(dropout_layer.params)
            self.delta_params.extend(dropout_layer.delta_params)
        # We now need to add a logistic layer on top of the MLP
        self.dropout_logLayer = LogisticRegression(
            input=self.dropout_layers[-1].dropout_output,
            n_in=self.hidden_layers_sizes[-1],
            n_out=self.n_outs)

        self.logLayer = LogisticRegression(
            input=(1 - self.dropout_factor[-1]) * self.layers[-1].output,
            n_in=self.hidden_layers_sizes[-1],
            n_out=self.n_outs,
            W=self.dropout_logLayer.W,
            b=self.dropout_logLayer.b)

        self.dropout_layers.append(self.dropout_logLayer)
        self.layers.append(self.logLayer)
        self.params.extend(self.dropout_logLayer.params)
        self.delta_params.extend(self.dropout_logLayer.delta_params)

        # compute the cost
        self.finetune_cost = self.dropout_logLayer.negative_log_likelihood(
            self.y)
        self.errors = self.logLayer.errors(self.y)

        if self.l1_reg is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                self.finetune_cost += self.l1_reg * (abs(W).sum())

        if self.l2_reg is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                self.finetune_cost += self.l2_reg * T.sqr(W).sum()
示例#12
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 corruption_levels=[0.1, 0.1],activation=T.nnet.sigmoid):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """
        super(SDA, self).__init__()
        
        self.layers = []
        self.dA_layers = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
                                 # [int] labels

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b,
                          activation=T.nnet.sigmoid)
            self.dA_layers.append(dA_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
                         input=self.layers[-1].output,
                         n_in=hidden_layers_sizes[-1], n_out=n_outs)
        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

        self.output = self.logLayer.prediction();
        self.features = self.layers[-2].output;
        self.features_dim = self.layers[-2].n_out
示例#13
0
class SDA(nnet):
    """Stacked denoising auto-encoder class (SdA)

    A stacked denoising autoencoder model is obtained by stacking several
    dAs. The hidden layer of the dA at layer `i` becomes the input of
    the dA at layer `i+1`. The first layer dA gets as input the input of
    the SdA, and the hidden layer of the last dA represents the output.
    Note that after pretraining, the SdA is dealt with as a normal MLP,
    the dAs are only used to initialize the weights.
    """

    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 corruption_levels=[0.1, 0.1],activation=T.nnet.sigmoid):
        """ This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the sdA

        :type n_layers_sizes: list of ints
        :param n_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type corruption_levels: list of float
        :param corruption_levels: amount of corruption to use for each
                                  layer
        """
        super(SDA, self).__init__()
        
        self.layers = []
        self.dA_layers = []
        self.n_layers = len(hidden_layers_sizes)

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector of
                                 # [int] labels

        # The SdA is an MLP, for which all weights of intermediate layers
        # are shared with a different denoising autoencoders
        # We will first construct the SdA as a deep multilayer perceptron,
        # and when constructing each sigmoidal layer we also construct a
        # denoising autoencoder that shares weights with that layer
        # During pretraining we will train these autoencoders (which will
        # lead to chainging the weights of the MLP as well)
        # During finetunining we will finish training the SdA by doing
        # stochastich gradient descent on the MLP

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden units of
            # the layer below or the input size if we are on the first layer
            if i == 0:
                input_size = n_ins
            else:
                input_size = hidden_layers_sizes[i - 1]

            # the input to this layer is either the activation of the hidden
            # layer below or the input of the SdA if you are on the first
            # layer
            if i == 0:
                layer_input = self.x
            else:
                layer_input = self.layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)
            # its arguably a philosophical question...
            # but we are going to only declare that the parameters of the
            # sigmoid_layers are parameters of the StackedDAA
            # the visible biases in the dA are parameters of those
            # dA, but not the SdA
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

            # Construct a denoising autoencoder that shared weights with this
            # layer
            dA_layer = dA(numpy_rng=numpy_rng,
                          theano_rng=theano_rng,
                          input=layer_input,
                          n_visible=input_size,
                          n_hidden=hidden_layers_sizes[i],
                          W=sigmoid_layer.W,
                          bhid=sigmoid_layer.b,
                          activation=T.nnet.sigmoid)
            self.dA_layers.append(dA_layer)

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
                         input=self.layers[-1].output,
                         n_in=hidden_layers_sizes[-1], n_out=n_outs)
        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

        self.output = self.logLayer.prediction();
        self.features = self.layers[-2].output;
        self.features_dim = self.layers[-2].n_out

    def pretraining_functions(self, train_x, batch_size):
        ''' Generates a list of functions, each of them implementing one
        step in trainnig the dA corresponding to the layer with same index.
        The function will require as input the minibatch index, and to train
        a dA you just need to iterate, calling the corresponding function on
        all minibatch indexes.

        :type train_x: theano.tensor.TensorType
        :param train_x: Shared variable that contains all datapoints used
                            for training the dA

        :type batch_size: int
        :param batch_size: size of a [mini]batch
        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        corruption_level = T.scalar('corruption')  # % of corruption to use
        learning_rate = T.scalar('lr')  # learning rate to use
        # number of batches
        n_batches = train_x.get_value(borrow=True).shape[0] / batch_size
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for dA in self.dA_layers:
            # get the cost and the updates list
            cost, updates = dA.get_cost_updates(corruption_level,
                                                learning_rate)
            # compile the theano function
            fn = theano.function(inputs=[index,
                              theano.Param(corruption_level, default=0.2),
                              theano.Param(learning_rate, default=0.1)],
                                 outputs=cost,
                                 updates=updates,
                                 givens={self.x: train_x[batch_begin:
                                                             batch_end]})
            # append `fn` to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns
示例#14
0
文件: drn.py 项目: Beronx86/pdnn
class DNN(object):

    def __init__(self, numpy_rng, theano_rng=None,
                 cfg = None,  # the network configuration
                 dnn_shared = None, shared_layers=[], input = None):

        self.layers = []
        self.params = []
        self.delta_params = []

        self.rnn_layerX = 2
        print "Use DRN"

        self.cfg = cfg
        self.n_ins = cfg.n_ins; self.n_outs = cfg.n_outs
        self.hidden_layers_sizes = cfg.hidden_layers_sizes
        self.hidden_layers_number = len(self.hidden_layers_sizes)
        self.activation = cfg.activation

        self.do_maxout = cfg.do_maxout; self.pool_size = cfg.pool_size

        self.max_col_norm = cfg.max_col_norm
        self.l1_reg = cfg.l1_reg
        self.l2_reg = cfg.l2_reg

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        if input == None:
            self.x = T.matrix('x')
        else:
            self.x = input 
        self.y = T.ivector('y')

        for i in xrange(self.hidden_layers_number):
            # construct the hidden layer
            if i == 0:
                input_size = self.n_ins
                layer_input = self.x
            else:
                input_size = self.hidden_layers_sizes[i - 1]
                layer_input = self.layers[-1].output

            W = None; b = None
            if (i in shared_layers) :
                W = dnn_shared.layers[i].W; b = dnn_shared.layers[i].b
            if i == self.rnn_layerX:
                hidden_layer = RnnLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=self.hidden_layers_sizes[i],
                                        W = W, b = b,
                                        activation=self.activation) 
            else:
                if self.do_maxout == True:
                    hidden_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=self.hidden_layers_sizes[i] * self.pool_size,
                                        W = W, b = b,
                                        activation = (lambda x: 1.0*x),
                                        do_maxout = True, pool_size = self.pool_size)
                else:
                    hidden_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=self.hidden_layers_sizes[i],
                                        W = W, b = b,
                                        activation=self.activation)
            # add the layer to our list of layers
            self.layers.append(hidden_layer)
            self.params.extend(hidden_layer.params)
            self.delta_params.extend(hidden_layer.delta_params)
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
                         input=self.layers[-1].output,
                         n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs)

        if self.n_outs > 0:
            self.layers.append(self.logLayer)
            self.params.extend(self.logLayer.params)
            self.delta_params.extend(self.logLayer.delta_params)
       
        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

        if self.l1_reg is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                self.finetune_cost += self.l1_reg * (abs(W).sum())

        if self.l2_reg is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                self.finetune_cost += self.l2_reg * T.sqr(W).sum()


    def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size):

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        index = T.lscalar('index')  # index to a [mini]batch
        learning_rate = T.fscalar('learning_rate')
        momentum = T.fscalar('momentum')

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = collections.OrderedDict()
        for dparam, gparam in zip(self.delta_params, gparams):
            updates[dparam] = momentum * dparam - gparam*learning_rate
        for dparam, param in zip(self.delta_params, self.params):
            updates[param] = param + updates[dparam]

        if self.max_col_norm is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                if W in updates:
                    updated_W = updates[W]
                    col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0))
                    desired_norms = T.clip(col_norms, 0, self.max_col_norm)
                    updates[W] = updated_W * (desired_norms / (1e-7 + col_norms))

        train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001),
              theano.Param(momentum, default = 0.5)],
              outputs=self.errors,
              updates=updates,
              givens={
                self.x: train_set_x[index * batch_size:
                                    (index + 1) * batch_size],
                self.y: train_set_y[index * batch_size:
                                    (index + 1) * batch_size]})

        valid_fn = theano.function(inputs=[index],
              outputs=self.errors,
              givens={
                self.x: valid_set_x[index * batch_size:
                                    (index + 1) * batch_size],
                self.y: valid_set_y[index * batch_size:
                                    (index + 1) * batch_size]})

        return train_fn, valid_fn


    def build_extract_feat_function(self, output_layer):

        feat = T.matrix('feat')
        out_da = theano.function([feat], self.layers[output_layer].output, updates = None, givens={self.x:feat}, on_unused_input='warn')
        return out_da

    def build_finetune_functions_kaldi(self, train_shared_xy, valid_shared_xy):

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        index = T.lscalar('index')  # index to a [mini]batch
        learning_rate = T.fscalar('learning_rate')
        momentum = T.fscalar('momentum')

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = collections.OrderedDict()
        for dparam, gparam in zip(self.delta_params, gparams):
            updates[dparam] = momentum * dparam - gparam*learning_rate
        for dparam, param in zip(self.delta_params, self.params):
            updates[param] = param + updates[dparam]

        if self.max_col_norm is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                if W in updates:
                    updated_W = updates[W]
                    col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0))
                    desired_norms = T.clip(col_norms, 0, self.max_col_norm)
                    updates[W] = updated_W * (desired_norms / (1e-7 + col_norms))

        train_fn = theano.function(inputs=[theano.Param(learning_rate, default = 0.0001),
              theano.Param(momentum, default = 0.5)],
              outputs=self.errors,
              updates=updates,
              givens={self.x: train_set_x, self.y: train_set_y})

        valid_fn = theano.function(inputs=[],
              outputs=self.errors,
              givens={self.x: valid_set_x, self.y: valid_set_y})

        return train_fn, valid_fn

    def write_model_to_raw(self, file_path):
        # output the model to tmp_path; this format is readable by PDNN
        _nnet2file(self.layers, filename=file_path)

    def write_model_to_kaldi(self, file_path, with_softmax = True):
        # determine whether it's BNF based on layer sizes
        output_layer_number = -1;
        for layer_index in range(1, self.hidden_layers_number - 1):
            cur_layer_size = self.hidden_layers_sizes[layer_index]
            prev_layer_size = self.hidden_layers_sizes[layer_index-1]
            next_layer_size = self.hidden_layers_sizes[layer_index+1]
            if cur_layer_size < prev_layer_size and cur_layer_size < next_layer_size:
                output_layer_number = layer_index+1; break

        layer_number = len(self.layers)
        if output_layer_number == -1:
            output_layer_number = layer_number

        fout = open(file_path, 'wb')
        for i in xrange(output_layer_number):
            activation_text = '<' + self.cfg.activation_text + '>'
            if i == (layer_number-1) and with_softmax:   # we assume that the last layer is a softmax layer
                activation_text = '<softmax>'
            W_mat = self.layers[i].W.get_value()
            b_vec = self.layers[i].b.get_value()
            input_size, output_size = W_mat.shape
            W_layer = []; b_layer = ''
            for rowX in xrange(output_size):
                W_layer.append('')

            for x in xrange(input_size):
                for t in xrange(output_size):
                    W_layer[t] = W_layer[t] + str(W_mat[x][t]) + ' '

            for x in xrange(output_size):
                b_layer = b_layer + str(b_vec[x]) + ' '

            fout.write('<affinetransform> ' + str(output_size) + ' ' + str(input_size) + '\n')
            fout.write('[' + '\n')
            for x in xrange(output_size):
                fout.write(W_layer[x].strip() + '\n')
            fout.write(']' + '\n')
            fout.write('[ ' + b_layer.strip() + ' ]' + '\n')
            if activation_text == '<maxout>':
                fout.write(activation_text + ' ' + str(output_size/self.pool_size) + ' ' + str(output_size) + '\n')
            else:
                fout.write(activation_text + ' ' + str(output_size) + ' ' + str(output_size) + '\n')
        fout.close()
示例#15
0
    def __init__(
            self,
            task_id,
            numpy_rng,
            theano_rng=None,
            cfg=None,  # the network configuration
            dnn_shared=None,
            shared_layers=[],
            input=None):

        self.layers = []
        self.params = []
        self.delta_params = []

        self.cfg = cfg
        self.n_ins = cfg.n_ins
        self.n_outs = cfg.n_outs
        self.hidden_layers_sizes = cfg.hidden_layers_sizes
        self.hidden_layers_number = len(self.hidden_layers_sizes)
        self.activation = cfg.activation

        self.do_maxout = cfg.do_maxout
        self.pool_size = cfg.pool_size

        self.max_col_norm = cfg.max_col_norm
        self.l1_reg = cfg.l1_reg
        self.l2_reg = cfg.l2_reg

        self.non_updated_layers = cfg.non_updated_layers

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        # allocate symbolic variables for the data
        if input == None:
            self.x = T.matrix('x')
        else:
            self.x = input
        if task_id == 0:
            self.y = T.ivector('y')
        else:
            self.y = T.matrix('y')

        #######################
        # build dnnv layers   #
        #######################
        print "=============="
        print "Task ID: %d" % (task_id)
        print "=============="
        print '1. start to build dnn layer: ' + str(self.hidden_layers_number)
        for i in xrange(self.hidden_layers_number):
            if i == 0:
                input_size = self.n_ins
                input = self.x
            else:
                input_size = self.hidden_layers_sizes[i - 1]
                input = self.layers[-1].output
            W = None
            b = None
            if (i in shared_layers):
                print "shared layer = %d" % (i)
                W = dnn_shared.layers[i].W
                b = dnn_shared.layers[i].b

            hidden_layer = HiddenLayer(rng=numpy_rng,
                                       input=input,
                                       n_in=input_size,
                                       n_out=self.hidden_layers_sizes[i],
                                       W=W,
                                       b=b,
                                       activation=self.activation)
            print '\tbuild lstm layer: ' + str(input_size) + ' x ' + str(
                hidden_layer.n_out)
            self.layers.append(hidden_layer)
            self.params.extend(hidden_layer.params)
            self.delta_params.extend(hidden_layer.delta_params)
        print '1. finish dnnv layer: ' + str(self.layers[-1].n_out)

        #######################
        # build log layers   #
        #######################
        print '2. start to build final layer: 1'
        input_size = self.layers[-1].n_out
        input = self.layers[-1].output
        if task_id == 0:
            self.logLayer = LogisticRegression(
                input=self.layers[-1].output,
                n_in=self.hidden_layers_sizes[-1],
                n_out=self.n_outs)
            print '\tbuild final layer (classification): ' + str(
                input_size) + ' x ' + str(self.logLayer.n_out)
            self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
            self.errors = self.logLayer.errors(self.y)
        else:
            self.logLayer = OutputLayer(input=input,
                                        n_in=input_size,
                                        n_out=self.n_outs)
            print '\tbuild final layer (regression): ' + str(
                input_size) + ' x ' + str(self.logLayer.n_out)
            self.finetune_cost = self.logLayer.l2(self.y)
            self.errors = self.logLayer.errors(self.y)

        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)
        print '2. finish log layer: ' + str(self.layers[-1].n_out)
        print 'Total layers: ' + str(len(self.layers))

        sys.stdout.flush()

        if self.l2_reg is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                self.finetune_cost += self.l2_reg * T.sqr(W).sum()
示例#16
0
class DNN_Dropout(object):

    def __init__(self, numpy_rng, theano_rng=None,
                 cfg = None,
                 dnn_shared = None, shared_layers=[]):

        self.layers = []
        self.dropout_layers = []
        self.params = []
        self.delta_params   = []

        self.cfg = cfg
        self.n_ins = cfg.n_ins; self.n_outs = cfg.n_outs
        self.hidden_layers_sizes = cfg.hidden_layers_sizes
        self.hidden_layers_number = len(self.hidden_layers_sizes)
        self.activation = cfg.activation

        self.do_maxout = cfg.do_maxout; self.pool_size = cfg.pool_size
        self.input_dropout_factor = cfg.input_dropout_factor; self.dropout_factor = cfg.dropout_factor

        self.max_col_norm = cfg.max_col_norm
        self.l1_reg = cfg.l1_reg
        self.l2_reg = cfg.l2_reg

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')
        self.y = T.ivector('y')

        for i in range(self.hidden_layers_number):
            # construct the hidden layer
            if i == 0:
                input_size = self.n_ins
                layer_input = self.x
                if self.input_dropout_factor > 0.0:
                    dropout_layer_input = _dropout_from_layer(theano_rng, self.x, self.input_dropout_factor)
                else:
                    dropout_layer_input = self.x
            else:
                input_size = self.hidden_layers_sizes[i - 1]
                layer_input = (1 - self.dropout_factor[i - 1]) * self.layers[-1].output
                dropout_layer_input = self.dropout_layers[-1].dropout_output

            W = None; b = None
            if (i in shared_layers) :
                W = dnn_shared.layers[i].W; b = dnn_shared.layers[i].b

            if self.do_maxout == False:
                dropout_layer = DropoutHiddenLayer(rng=numpy_rng,
                                        input=dropout_layer_input,
                                        n_in=input_size,
                                        n_out=self.hidden_layers_sizes[i],
                                        W = W, b = b,
                                        activation= self.activation,
                                        dropout_factor=self.dropout_factor[i])
                hidden_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=self.hidden_layers_sizes[i],
                                        activation= self.activation,
                                        W=dropout_layer.W, b=dropout_layer.b)
            else:
                dropout_layer = DropoutHiddenLayer(rng=numpy_rng,
                                        input=dropout_layer_input,
                                        n_in=input_size,
                                        n_out=self.hidden_layers_sizes[i] * self.pool_size,
                                        W = W, b = b,
                                        activation= (lambda x: 1.0*x),
                                        dropout_factor=self.dropout_factor[i],
                                        do_maxout = True, pool_size = self.pool_size)
                hidden_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=self.hidden_layers_sizes[i] * self.pool_size,
                                        activation= (lambda x: 1.0*x),
                                        W=dropout_layer.W, b=dropout_layer.b,
                                        do_maxout = True, pool_size = self.pool_size)
            # add the layer to our list of layers
            self.layers.append(hidden_layer)
            self.dropout_layers.append(dropout_layer)
            self.params.extend(dropout_layer.params)
            self.delta_params.extend(dropout_layer.delta_params)
        # We now need to add a logistic layer on top of the MLP
        self.dropout_logLayer = LogisticRegression(
                                 input=self.dropout_layers[-1].dropout_output,
                                 n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs)

        self.logLayer = LogisticRegression(
                         input=(1 - self.dropout_factor[-1]) * self.layers[-1].output,
                         n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs,
                         W=self.dropout_logLayer.W, b=self.dropout_logLayer.b)

        self.dropout_layers.append(self.dropout_logLayer)
        self.layers.append(self.logLayer)
        self.params.extend(self.dropout_logLayer.params)
        self.delta_params.extend(self.dropout_logLayer.delta_params)

        # compute the cost
        self.finetune_cost = self.dropout_logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

        if self.l1_reg is not None:
            for i in range(self.hidden_layers_number):
                W = self.layers[i].W
                self.finetune_cost += self.l1_reg * (abs(W).sum())

        if self.l2_reg is not None:
            for i in range(self.hidden_layers_number):
                W = self.layers[i].W
                self.finetune_cost += self.l2_reg * T.sqr(W).sum()

    def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size):

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        index = T.lscalar('index')  # index to a [mini]batch
        updates = self.cfg.lrate.getOptimizerUpdates(self.finetune_cost,self.delta_params,self.params)
        for dparam, param in zip(self.delta_params, self.params):
            updates[param] = param + updates[dparam]

        if self.max_col_norm is not None:
            for i in range(self.hidden_layers_number):
                W = self.layers[i].W
                if W in updates:
                    updated_W = updates[W]
                    col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0))
                    desired_norms = T.clip(col_norms, 0, self.max_col_norm)
                    updates[W] = updated_W * (desired_norms / (1e-7 + col_norms))

        train_fn = theano.function(inputs=[index],
              outputs=self.errors,
              updates=updates,
              givens={
                self.x: train_set_x[index * batch_size:
                                    (index + 1) * batch_size],
                self.y: train_set_y[index * batch_size:
                                    (index + 1) * batch_size]})

        valid_fn = theano.function(inputs=[index],
              outputs=self.errors,
              givens={
                self.x: valid_set_x[index * batch_size:
                                    (index + 1) * batch_size],
                self.y: valid_set_y[index * batch_size:
                                    (index + 1) * batch_size]})

        return train_fn, valid_fn

    def write_model_to_raw(self, file_path):
        # output the model to tmp_path; this format is readable by PDNN
        _nnet2file(self.layers, filename=file_path, input_factor = self.input_dropout_factor, factor = self.dropout_factor)

    def write_model_to_kaldi(self, file_path, with_softmax = True):

        # determine whether it's BNF based on layer sizes
        output_layer_number = -1;
        for layer_index in range(1, self.hidden_layers_number - 1):
            cur_layer_size = self.hidden_layers_sizes[layer_index]
            prev_layer_size = self.hidden_layers_sizes[layer_index-1]
            next_layer_size = self.hidden_layers_sizes[layer_index+1]
            if cur_layer_size < prev_layer_size and cur_layer_size < next_layer_size:
                output_layer_number = layer_index+1; break

        layer_number = len(self.layers)
        if output_layer_number == -1:
            output_layer_number = layer_number

        fout = smart_open(file_path, 'wb')
        for i in range(output_layer_number):
            # decide the dropout factor for this layer
            dropout_factor = 0.0
            if i == 0:
                dropout_factor = self.input_dropout_factor
            if i > 0 and len(self.dropout_factor) > 0:
                dropout_factor = self.dropout_factor[i-1]

            activation_text = '<' + self.cfg.activation_text + '>'
            if i == (layer_number-1) and with_softmax:   # we assume that the last layer is a softmax layer
                activation_text = '<softmax>'
            W_mat = (1.0 - dropout_factor) * self.layers[i].W.get_value()
            b_vec = self.layers[i].b.get_value()
            input_size, output_size = W_mat.shape
            W_layer = []; b_layer = ''
            for rowX in range(output_size):
                W_layer.append('')

            for x in range(input_size):
                for t in range(output_size):
                    W_layer[t] = W_layer[t] + str(W_mat[x][t]) + ' '

            for x in range(output_size):
                b_layer = b_layer + str(b_vec[x]) + ' '

            fout.write('<affinetransform> ' + str(output_size) + ' ' + str(input_size) + '\n')
            fout.write('[' + '\n')
            for x in range(output_size):
                fout.write(W_layer[x].strip() + '\n')
            fout.write(']' + '\n')
            fout.write('[ ' + b_layer.strip() + ' ]' + '\n')
            if activation_text == '<maxout>':
                fout.write(activation_text + ' ' + str(output_size/self.pool_size) + ' ' + str(output_size) + '\n')
            else:
                fout.write(activation_text + ' ' + str(output_size) + ' ' + str(output_size) + '\n')
        fout.close()
示例#17
0
class DNN_Dropout(nnet):

    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 activation = T.nnet.sigmoid, input_dropout_factor = 0,
                 dropout_factor = [0.2,0.2,0.2,0.2,0.2,0.2,0.2],
                 adv_activation = None, max_col_norm = None,
                 l1_reg = None, l2_reg = None):

        super(DNN_Dropout, self).__init__()

        self.layers = []
        self.dropout_layers = []
        self.n_layers = len(hidden_layers_sizes)

        self.max_col_norm = max_col_norm
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        self.input_dropout_factor = input_dropout_factor
        self.dropout_factor = dropout_factor

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        self.y = T.ivector('y')
		
        for i in xrange(self.n_layers):
            # construct the sigmoidal layer
            if i == 0:
                input_size = n_ins
                layer_input = self.x
                if input_dropout_factor > 0.0:
                    dropout_layer_input = _dropout_from_layer(theano_rng, self.x, input_dropout_factor)
                else:
                    dropout_layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = (1 - self.dropout_factor[i - 1]) * self.layers[-1].output
                dropout_layer_input = self.dropout_layers[-1].dropout_output
			
            if not adv_activation  is None:
                dropout_layer = DropoutHiddenLayer(rng=numpy_rng,
                                        input=dropout_layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * adv_activation['pool_size'],
                                        activation= activation,
                                        adv_activation_method = adv_activation['method'],
                                        pool_size = adv_activation['pool_size'],
                                        pnorm_order = adv_activation['pnorm_order'],
                                        dropout_factor=self.dropout_factor[i])
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * adv_activation['pool_size'],
                                        activation=activation,
                                        adv_activation_method = adv_activation['method'],
                                        pool_size = adv_activation['pool_size'],
                                        pnorm_order = adv_activation['pnorm_order'],
                                        W=dropout_layer.W, b=dropout_layer.b)
            else:
                dropout_layer = DropoutHiddenLayer(rng=numpy_rng,
                                        input=dropout_layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation= activation,
                                        dropout_factor=self.dropout_factor[i])
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] ,
                                        activation= activation,
                                        W=dropout_layer.W, b=dropout_layer.b)
                                        
            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)
            self.dropout_layers.append(dropout_layer)
            self.params.extend(dropout_layer.params)
            self.delta_params.extend(dropout_layer.delta_params)
            
        # We now need to add a logistic layer on top of the MLP
        self.dropout_logLayer = LogisticRegression(
                                 input=self.dropout_layers[-1].dropout_output,
                                 n_in=hidden_layers_sizes[-1], n_out=n_outs)

        self.logLayer = LogisticRegression(
                         input=(1 - self.dropout_factor[-1]) * self.layers[-1].output,
                         n_in=hidden_layers_sizes[-1], n_out=n_outs,
                         W=self.dropout_logLayer.W, b=self.dropout_logLayer.b)

        self.dropout_layers.append(self.dropout_logLayer)
        self.layers.append(self.logLayer)
        self.params.extend(self.dropout_logLayer.params)
        self.delta_params.extend(self.dropout_logLayer.delta_params)

        # compute the cost
        self.finetune_cost = self.dropout_logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

        self.output = self.logLayer.prediction();
        self.features = self.layers[-2].output;
        self.features_dim = self.layers[-2].n_out

        if self.l1_reg is not None:
            self.__l1Regularization__();

        if self.l2_reg is not None:
            self.__l2Regularization__();


    def save(self,filename,start_layer = 0,max_layer_num = -1,withfinal=True):
        nnet_dict = {}
        if max_layer_num == -1:
           max_layer_num = self.n_layers

        for i in range(start_layer, max_layer_num):
           dict_a = str(i) + ' W'
           if i == 0:
               nnet_dict[dict_a] = _array2string((1.0 - self.input_dropout_factor) * (
                self.layers[i].params[0].get_value()))
           else:
               nnet_dict[dict_a] = _array2string((1.0 - self.dropout_factor[i - 1])* (
                self.layers[i].params[0].get_value()))
           dict_a = str(i) + ' b'
           nnet_dict[dict_a] = _array2string(self.layers[i].params[1].get_value())

        if withfinal: 
            dict_a = 'logreg W'
            nnet_dict[dict_a] = _array2string((1.0 - self.dropout_factor[-1])* (
                self.logLayer.params[0].get_value()))
            dict_a = 'logreg b'
            nnet_dict[dict_a] = _array2string(self.logLayer.params[1].get_value())
   
        with open(filename, 'wb') as fp:
            json.dump(nnet_dict, fp, indent=2, sort_keys = True)
            fp.flush()

    def load(self,filename,start_layer = 0,max_layer_num = -1,withfinal=True):
        nnet_dict = {}
        if max_layer_num == -1:
            max_layer_num = self.n_layers

        with open(filename, 'rb') as fp:
            nnet_dict = json.load(fp)
        
        for i in xrange(max_layer_num):
            dict_key = str(i) + ' W'
            self.layers[i].params[0].set_value(numpy.asarray(_string2array(nnet_dict[dict_key]),
                dtype=theano.config.floatX))
            dict_key = str(i) + ' b' 
            self.layers[i].params[1].set_value(numpy.asarray(_string2array(nnet_dict[dict_key]),
                dtype=theano.config.floatX))

        if withfinal:
            dict_key = 'logreg W'
            self.logLayer.params[0].set_value(numpy.asarray(_string2array(nnet_dict[dict_key]),
                dtype=theano.config.floatX))
            dict_key = 'logreg b'
            self.logLayer.params[1].set_value(numpy.asarray(_string2array(nnet_dict[dict_key]),
                dtype=theano.config.floatX))
示例#18
0
class DNN_2Tower(object):

    def __init__(self, numpy_rng, theano_rng=None,
                 upper_hidden_layers_sizes=[500, 500], n_outs=10,
                 tower1_hidden_layers_sizes=[500, 500], tower1_n_ins = 100,
                 tower2_hidden_layers_sizes=[500, 500], tower2_n_ins = 100,
                 activation = T.nnet.sigmoid,
                 do_maxout = False, pool_size = 1, 
                 do_pnorm = False, pnorm_order = 1,
                 max_col_norm = None, l1_reg = None, l2_reg = None):

        self.tower1_layers = []
        self.tower2_layers = []
        self.upper_layers = []

        self.params = []
        self.delta_params   = []

        self.max_col_norm = max_col_norm
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        self.y = T.ivector('y')

        self.tower1_input = self.x[:,0:tower1_n_ins]
        self.tower2_input = self.x[:,tower1_n_ins:(tower1_n_ins + tower2_n_ins)]

        # build tower1
        for i in xrange(len(tower1_hidden_layers_sizes)):
            if i == 0:
                input_size = tower1_n_ins
                layer_input = self.tower1_input
            else:
                input_size = tower1_hidden_layers_sizes[i - 1]
                layer_input = self.tower1_layers[-1].output 

            layer = HiddenLayer(rng=numpy_rng,
                                input=layer_input,
                                n_in=input_size,
                                n_out=tower1_hidden_layers_sizes[i],
                                activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.tower1_layers.append(layer)
            self.params.extend(layer.params)
            self.delta_params.extend(layer.delta_params)

        # build tower2
        for i in xrange(len(tower2_hidden_layers_sizes)):
            if i == 0:
                input_size = tower2_n_ins
                layer_input = self.tower2_input
            else:
                input_size = tower2_hidden_layers_sizes[i - 1]
                layer_input = self.tower2_layers[-1].output     

            layer = HiddenLayer(rng=numpy_rng,
                                input=layer_input,
                                n_in=input_size,
                                n_out=tower2_hidden_layers_sizes[i],
                                activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.tower2_layers.append(layer)
            self.params.extend(layer.params)
            self.delta_params.extend(layer.delta_params)

        for i in xrange(len(upper_hidden_layers_sizes)):
            # construct the sigmoidal layer
            if i == 0:
                input_size = tower1_hidden_layers_sizes[-1] + tower2_hidden_layers_sizes[-1]
                layer_input = T.concatenate([self.tower1_layers[-1].output, self.tower2_layers[-1].output], axis=1)
            else:
                input_size = upper_hidden_layers_sizes[i - 1]
                layer_input = self.upper_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=upper_hidden_layers_sizes[i],
                                        activation=activation)
            # add the layer to our list of layers
            self.upper_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
                         input=self.upper_layers[-1].output,
                         n_in=upper_hidden_layers_sizes[-1], n_out=n_outs)

        self.upper_layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)
       
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

#        if self.l1_reg is not None:
#            for i in xrange(self.n_layers):
#                W = self.params[i * 2]
#                self.finetune_cost += self.l1_reg * (abs(W).sum())

#        if self.l2_reg is not None:
#            for i in xrange(self.n_layers):
#                W = self.params[i * 2]
#                self.finetune_cost += self.l2_reg * T.sqr(W).sum()

    def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size):

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        index = T.lscalar('index')  # index to a [mini]batch
        learning_rate = T.fscalar('learning_rate')
        momentum = T.fscalar('momentum')

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = collections.OrderedDict()
        for dparam, gparam in zip(self.delta_params, gparams):
            updates[dparam] = momentum * dparam - gparam*learning_rate
        for dparam, param in zip(self.delta_params, self.params):
            updates[param] = param + updates[dparam]

        if self.max_col_norm is not None:
            for i in xrange(self.n_layers):
                W = self.params[i * 2]
                if W in updates:
                    updated_W = updates[W]
                    col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0))
                    desired_norms = T.clip(col_norms, 0, self.max_col_norm)
                    updates[W] = updated_W * (desired_norms / (1e-7 + col_norms))

        train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001),
              theano.Param(momentum, default = 0.5)],
              outputs=self.errors,
              updates=updates,
              givens={
                self.x: train_set_x[index * batch_size:
                                    (index + 1) * batch_size],
                self.y: train_set_y[index * batch_size:
                                    (index + 1) * batch_size]})

        valid_fn = theano.function(inputs=[index],
              outputs=self.errors,
              givens={
                self.x: valid_set_x[index * batch_size:
                                    (index + 1) * batch_size],
                self.y: valid_set_y[index * batch_size:
                                    (index + 1) * batch_size]})

        return train_fn, valid_fn
示例#19
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 activation = T.nnet.sigmoid, input_dropout_factor = 0,
                 dropout_factor = [0.2,0.2,0.2,0.2,0.2,0.2,0.2],
                 adv_activation = None, max_col_norm = None,
                 l1_reg = None, l2_reg = None):

        super(DNN_Dropout, self).__init__()

        self.layers = []
        self.dropout_layers = []
        self.n_layers = len(hidden_layers_sizes)

        self.max_col_norm = max_col_norm
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        self.input_dropout_factor = input_dropout_factor
        self.dropout_factor = dropout_factor

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        self.y = T.ivector('y')
		
        for i in xrange(self.n_layers):
            # construct the sigmoidal layer
            if i == 0:
                input_size = n_ins
                layer_input = self.x
                if input_dropout_factor > 0.0:
                    dropout_layer_input = _dropout_from_layer(theano_rng, self.x, input_dropout_factor)
                else:
                    dropout_layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = (1 - self.dropout_factor[i - 1]) * self.layers[-1].output
                dropout_layer_input = self.dropout_layers[-1].dropout_output
			
            if not adv_activation  is None:
                dropout_layer = DropoutHiddenLayer(rng=numpy_rng,
                                        input=dropout_layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * adv_activation['pool_size'],
                                        activation= activation,
                                        adv_activation_method = adv_activation['method'],
                                        pool_size = adv_activation['pool_size'],
                                        pnorm_order = adv_activation['pnorm_order'],
                                        dropout_factor=self.dropout_factor[i])
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * adv_activation['pool_size'],
                                        activation=activation,
                                        adv_activation_method = adv_activation['method'],
                                        pool_size = adv_activation['pool_size'],
                                        pnorm_order = adv_activation['pnorm_order'],
                                        W=dropout_layer.W, b=dropout_layer.b)
            else:
                dropout_layer = DropoutHiddenLayer(rng=numpy_rng,
                                        input=dropout_layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation= activation,
                                        dropout_factor=self.dropout_factor[i])
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] ,
                                        activation= activation,
                                        W=dropout_layer.W, b=dropout_layer.b)
                                        
            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)
            self.dropout_layers.append(dropout_layer)
            self.params.extend(dropout_layer.params)
            self.delta_params.extend(dropout_layer.delta_params)
            
        # We now need to add a logistic layer on top of the MLP
        self.dropout_logLayer = LogisticRegression(
                                 input=self.dropout_layers[-1].dropout_output,
                                 n_in=hidden_layers_sizes[-1], n_out=n_outs)

        self.logLayer = LogisticRegression(
                         input=(1 - self.dropout_factor[-1]) * self.layers[-1].output,
                         n_in=hidden_layers_sizes[-1], n_out=n_outs,
                         W=self.dropout_logLayer.W, b=self.dropout_logLayer.b)

        self.dropout_layers.append(self.dropout_logLayer)
        self.layers.append(self.logLayer)
        self.params.extend(self.dropout_logLayer.params)
        self.delta_params.extend(self.dropout_logLayer.delta_params)

        # compute the cost
        self.finetune_cost = self.dropout_logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

        self.output = self.logLayer.prediction();
        self.features = self.layers[-2].output;
        self.features_dim = self.layers[-2].n_out

        if self.l1_reg is not None:
            self.__l1Regularization__();

        if self.l2_reg is not None:
            self.__l2Regularization__();
示例#20
0
class CNN(object):

    def __init__(self, numpy_rng, theano_rng=None,
                 batch_size = 256, n_outs=500,
		 sparsity = None, sparsity_weight = None, sparse_layer = 3,
                 conv_layer_configs = [],
                 hidden_layers_sizes=[500, 500],
                 conv_activation = T.nnet.sigmoid,
                 full_activation = T.nnet.sigmoid,
                 use_fast = False):

        self.layers = []
        self.params = []
        self.delta_params   = []

        self.sparsity = sparsity
        self.sparsity_weight = sparsity_weight
        self.sparse_layer = sparse_layer

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  
        self.y = T.ivector('y') 
        
        self.conv_layer_num = len(conv_layer_configs)
        self.full_layer_num = len(hidden_layers_sizes)

        for i in xrange(self.conv_layer_num):
            if i == 0:
                input = self.x
                is_input_layer = True
            else:
                input = self.layers[-1].output
                is_input_layer = False
            config = conv_layer_configs[i]
            conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input, is_input_layer = is_input_layer,
			input_shape = config['input_shape'], filter_shape = config['filter_shape'], poolsize = config['poolsize'],
			activation = conv_activation, flatten = config['flatten'])
	    self.layers.append(conv_layer)
	    self.params.extend(conv_layer.params)
            self.delta_params.extend(conv_layer.delta_params)

        self.conv_output_dim = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3]

        for i in xrange(self.full_layer_num):
            # construct the sigmoidal layer
            if i == 0:
                input_size = self.conv_output_dim
            else:
                input_size = hidden_layers_sizes[i - 1]
            layer_input = self.layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=full_activation)
            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

	# We now need to add a logistic layer on top of the MLP
	self.logLayer = LogisticRegression(
			       input=self.layers[-1].output,
			       n_in=hidden_layers_sizes[-1], n_out=n_outs)
        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)

	if self.sparsity_weight is not None:
            sparsity_level = T.extra_ops.repeat(self.sparsity, 630)
	    avg_act = self.sigmoid_layers[sparse_layer].output.mean(axis=0)
	    kl_div = self.kl_divergence(sparsity_level, avg_act)
	    self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) + self.sparsity_weight * kl_div.sum()     
	else:
            self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        self.errors = self.logLayer.errors(self.y)

    def kl_divergence(self, p, p_hat):
        return p * T.log(p / p_hat) + (1 - p) * T.log((1 - p) / (1 - p_hat))

    def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size):

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        index = T.lscalar('index')  # index to a [mini]batch
        learning_rate = T.fscalar('learning_rate')
        momentum = T.fscalar('momentum')

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = {}

        for dparam, gparam in zip(self.delta_params, gparams):
            updates[dparam] = momentum * dparam - gparam*learning_rate
        for dparam, param in zip(self.delta_params, self.params):
            updates[param] = param + updates[dparam]

        train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001),
              theano.Param(momentum, default = 0.5)],
              outputs=self.errors,
              updates=updates,
              givens={
                self.x: train_set_x[index * batch_size:
                                    (index + 1) * batch_size],
		self.y: train_set_y[index * batch_size:
			            (index + 1) * batch_size]})

        valid_fn = theano.function(inputs=[index],
              outputs=self.errors,
              givens={
                self.x: valid_set_x[index * batch_size:
                                    (index + 1) * batch_size],
		self.y: valid_set_y[index * batch_size:
			            (index + 1) * batch_size]})

        return train_fn, valid_fn
示例#21
0
class DNN_2Tower(object):
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 upper_hidden_layers_sizes=[500, 500],
                 n_outs=10,
                 tower1_hidden_layers_sizes=[500, 500],
                 tower1_n_ins=100,
                 tower2_hidden_layers_sizes=[500, 500],
                 tower2_n_ins=100,
                 activation=T.nnet.sigmoid,
                 do_maxout=False,
                 pool_size=1,
                 do_pnorm=False,
                 pnorm_order=1,
                 max_col_norm=None,
                 l1_reg=None,
                 l2_reg=None):

        self.tower1_layers = []
        self.tower2_layers = []
        self.upper_layers = []

        self.params = []
        self.delta_params = []

        self.max_col_norm = max_col_norm
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')
        self.y = T.ivector('y')

        self.tower1_input = self.x[:, 0:tower1_n_ins]
        self.tower2_input = self.x[:,
                                   tower1_n_ins:(tower1_n_ins + tower2_n_ins)]

        # build tower1
        for i in xrange(len(tower1_hidden_layers_sizes)):
            if i == 0:
                input_size = tower1_n_ins
                layer_input = self.tower1_input
            else:
                input_size = tower1_hidden_layers_sizes[i - 1]
                layer_input = self.tower1_layers[-1].output

            layer = HiddenLayer(rng=numpy_rng,
                                input=layer_input,
                                n_in=input_size,
                                n_out=tower1_hidden_layers_sizes[i],
                                activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.tower1_layers.append(layer)
            self.params.extend(layer.params)
            self.delta_params.extend(layer.delta_params)

        # build tower2
        for i in xrange(len(tower2_hidden_layers_sizes)):
            if i == 0:
                input_size = tower2_n_ins
                layer_input = self.tower2_input
            else:
                input_size = tower2_hidden_layers_sizes[i - 1]
                layer_input = self.tower2_layers[-1].output

            layer = HiddenLayer(rng=numpy_rng,
                                input=layer_input,
                                n_in=input_size,
                                n_out=tower2_hidden_layers_sizes[i],
                                activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.tower2_layers.append(layer)
            self.params.extend(layer.params)
            self.delta_params.extend(layer.delta_params)

        for i in xrange(len(upper_hidden_layers_sizes)):
            # construct the sigmoidal layer
            if i == 0:
                input_size = tower1_hidden_layers_sizes[
                    -1] + tower2_hidden_layers_sizes[-1]
                layer_input = T.concatenate([
                    self.tower1_layers[-1].output,
                    self.tower2_layers[-1].output
                ],
                                            axis=1)
            else:
                input_size = upper_hidden_layers_sizes[i - 1]
                layer_input = self.upper_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=upper_hidden_layers_sizes[i],
                                        activation=activation)
            # add the layer to our list of layers
            self.upper_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(input=self.upper_layers[-1].output,
                                           n_in=upper_hidden_layers_sizes[-1],
                                           n_out=n_outs)

        self.upper_layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)

        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

#        if self.l1_reg is not None:
#            for i in xrange(self.n_layers):
#                W = self.params[i * 2]
#                self.finetune_cost += self.l1_reg * (abs(W).sum())

#        if self.l2_reg is not None:
#            for i in xrange(self.n_layers):
#                W = self.params[i * 2]
#                self.finetune_cost += self.l2_reg * T.sqr(W).sum()

    def build_finetune_functions(self, train_shared_xy, valid_shared_xy,
                                 batch_size):

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        index = T.lscalar('index')  # index to a [mini]batch
        learning_rate = T.fscalar('learning_rate')
        momentum = T.fscalar('momentum')

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = collections.OrderedDict()
        for dparam, gparam in zip(self.delta_params, gparams):
            updates[dparam] = momentum * dparam - gparam * learning_rate
        for dparam, param in zip(self.delta_params, self.params):
            updates[param] = param + updates[dparam]

        if self.max_col_norm is not None:
            for i in xrange(self.n_layers):
                W = self.params[i * 2]
                if W in updates:
                    updated_W = updates[W]
                    col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0))
                    desired_norms = T.clip(col_norms, 0, self.max_col_norm)
                    updates[W] = updated_W * (desired_norms /
                                              (1e-7 + col_norms))

        train_fn = theano.function(
            inputs=[
                index,
                theano.Param(learning_rate, default=0.0001),
                theano.Param(momentum, default=0.5)
            ],
            outputs=self.errors,
            updates=updates,
            givens={
                self.x:
                train_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                train_set_y[index * batch_size:(index + 1) * batch_size]
            })

        valid_fn = theano.function(
            inputs=[index],
            outputs=self.errors,
            givens={
                self.x:
                valid_set_x[index * batch_size:(index + 1) * batch_size],
                self.y:
                valid_set_y[index * batch_size:(index + 1) * batch_size]
            })

        return train_fn, valid_fn
示例#22
0
    def __init__(self, numpy_rng, theano_rng=None,
                 cfg = None,  # the network configuration
                 dnn_shared = None, shared_layers=[], input = None, draw=None):

        self.cfg = cfg
        self.params = []
        self.delta_params   = []
        self.n_ins = cfg.n_ins; self.n_outs = cfg.n_outs
        self.l1_reg = cfg.l1_reg
        self.l2_reg = cfg.l2_reg
        self.do_maxout = cfg.do_maxout; self.pool_size = cfg.pool_size
        self.max_col_norm = 1
        print self.max_col_norm

        self.layers = []
        self.lstm_layers = []
        self.fc_layers = []
        self.bilayers = []

        # 1. lstm
        self.lstm_layers_sizes = cfg.lstm_layers_sizes
        self.lstm_layers_number = len(self.lstm_layers_sizes)
        # 2. dnn
        self.hidden_layers_sizes = cfg.hidden_layers_sizes
        self.hidden_layers_number = len(self.hidden_layers_sizes)
        self.activation = cfg.activation


        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        if input == None:
            self.x = T.matrix('x')
        else:
            self.x = input 
        self.y = T.ivector('y')

        #######################
        # build lstm layers   #
        #######################
        print '1. start to build AttendLSTMLayer : '+ str(self.lstm_layers_number) + ', n_attendout: '+ str(cfg.batch_size)
        for i in xrange(1):
            if i == 0:
                input_size = self.n_ins
                input = self.x
            else:
                input_size = self.lstm_layers_sizes[i - 1]
                input = self.bilayers[-1].output

            # Forward
            f_lstm_layer = AttendLSTMLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.lstm_layers_sizes[i],
                                                steps=cfg.batch_size, draw=draw)
            print '\tbuild f_lstm layer: ' + str(input_size) +' x '+ str(f_lstm_layer.n_out)
            self.layers.append(f_lstm_layer)
            self.lstm_layers.append(f_lstm_layer)
            self.params.extend(f_lstm_layer.params)
            self.delta_params.extend(f_lstm_layer.delta_params)

            # Backward
            b_lstm_layer = AttendLSTMLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.lstm_layers_sizes[i], backwards=True,
                                                steps=cfg.batch_size, draw=draw)
            print '\tbuild b_lstm layer: ' + str(input_size) +' x '+ str(b_lstm_layer.n_out)
            self.layers.append(b_lstm_layer)
            self.lstm_layers.append(b_lstm_layer)
            self.params.extend(b_lstm_layer.params)
            self.delta_params.extend(b_lstm_layer.delta_params)

            # Sum forward + backward
            bi_layer = SUMLayer(finput=f_lstm_layer.output,binput=b_lstm_layer.output[::-1], n_out=self.lstm_layers_sizes[i - 1])
            self.bilayers.append(bi_layer)
            print '\tbuild sum layer: ' + str(input_size) +' x '+ str(bi_layer.n_out)

        print '1. finish AttendLSTMLayer: '+ str(self.bilayers[-1].n_out)

        #######################
        # build log layers   #
        #######################
        print '3. start to build log layer: 1'
        input_size = self.bilayers[-1].n_out
        input = self.bilayers[-1].output
        logLayer = LogisticRegression(input=input, n_in=input_size, n_out=self.n_outs)
        print '\tbuild final layer: ' + str(input_size) +' x '+ str(self.n_outs)
        self.layers.append(logLayer)
        self.params.extend(logLayer.params)
        self.delta_params.extend(logLayer.delta_params)
        print '3. finish log layer: '+ str(self.bilayers[-1].n_out)
        print 'Total layers: '+ str(len(self.layers))

        sys.stdout.flush()

        self.finetune_cost = logLayer.negative_log_likelihood(self.y)
        self.errors = logLayer.errors(self.y)
示例#23
0
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 upper_hidden_layers_sizes=[500, 500],
                 n_outs=10,
                 tower1_hidden_layers_sizes=[500, 500],
                 tower1_n_ins=100,
                 tower2_hidden_layers_sizes=[500, 500],
                 tower2_n_ins=100,
                 activation=T.nnet.sigmoid,
                 do_maxout=False,
                 pool_size=1,
                 do_pnorm=False,
                 pnorm_order=1,
                 max_col_norm=None,
                 l1_reg=None,
                 l2_reg=None):

        self.tower1_layers = []
        self.tower2_layers = []
        self.upper_layers = []

        self.params = []
        self.delta_params = []

        self.max_col_norm = max_col_norm
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')
        self.y = T.ivector('y')

        self.tower1_input = self.x[:, 0:tower1_n_ins]
        self.tower2_input = self.x[:,
                                   tower1_n_ins:(tower1_n_ins + tower2_n_ins)]

        # build tower1
        for i in xrange(len(tower1_hidden_layers_sizes)):
            if i == 0:
                input_size = tower1_n_ins
                layer_input = self.tower1_input
            else:
                input_size = tower1_hidden_layers_sizes[i - 1]
                layer_input = self.tower1_layers[-1].output

            layer = HiddenLayer(rng=numpy_rng,
                                input=layer_input,
                                n_in=input_size,
                                n_out=tower1_hidden_layers_sizes[i],
                                activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.tower1_layers.append(layer)
            self.params.extend(layer.params)
            self.delta_params.extend(layer.delta_params)

        # build tower2
        for i in xrange(len(tower2_hidden_layers_sizes)):
            if i == 0:
                input_size = tower2_n_ins
                layer_input = self.tower2_input
            else:
                input_size = tower2_hidden_layers_sizes[i - 1]
                layer_input = self.tower2_layers[-1].output

            layer = HiddenLayer(rng=numpy_rng,
                                input=layer_input,
                                n_in=input_size,
                                n_out=tower2_hidden_layers_sizes[i],
                                activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.tower2_layers.append(layer)
            self.params.extend(layer.params)
            self.delta_params.extend(layer.delta_params)

        for i in xrange(len(upper_hidden_layers_sizes)):
            # construct the sigmoidal layer
            if i == 0:
                input_size = tower1_hidden_layers_sizes[
                    -1] + tower2_hidden_layers_sizes[-1]
                layer_input = T.concatenate([
                    self.tower1_layers[-1].output,
                    self.tower2_layers[-1].output
                ],
                                            axis=1)
            else:
                input_size = upper_hidden_layers_sizes[i - 1]
                layer_input = self.upper_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=upper_hidden_layers_sizes[i],
                                        activation=activation)
            # add the layer to our list of layers
            self.upper_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(input=self.upper_layers[-1].output,
                                           n_in=upper_hidden_layers_sizes[-1],
                                           n_out=n_outs)

        self.upper_layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)

        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)
示例#24
0
文件: cnn_sat.py 项目: xczhanjun/pdnn
    def __init__(self,
                 numpy_rng,
                 theano_rng=None,
                 batch_size=256,
                 n_outs=500,
                 conv_layer_configs=[],
                 hidden_layers_sizes=[500, 500],
                 ivec_layers_sizes=[500, 500],
                 conv_activation=T.nnet.sigmoid,
                 full_activation=T.nnet.sigmoid,
                 use_fast=False,
                 update_part=[0, 1],
                 ivec_dim=100):

        self.conv_layers = []
        self.full_layers = []
        self.ivec_layers = []

        self.params = []
        self.delta_params = []

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')
        self.y = T.ivector('y')

        input_shape = conv_layer_configs[0]['input_shape']
        n_ins = input_shape[-1] * input_shape[-2] * input_shape[-3]

        self.iv = self.x[:, n_ins:n_ins + ivec_dim]
        self.raw = self.x[:, 0:n_ins]

        self.conv_layer_num = len(conv_layer_configs)
        self.full_layer_num = len(hidden_layers_sizes)
        self.ivec_layer_num = len(ivec_layers_sizes)

        # construct the adaptation NN
        for i in xrange(self.ivec_layer_num):
            if i == 0:
                input_size = ivec_dim
                layer_input = self.iv
            else:
                input_size = ivec_layers_sizes[i - 1]
                layer_input = self.ivec_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=ivec_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.ivec_layers.append(sigmoid_layer)
            if 0 in update_part:
                self.params.extend(sigmoid_layer.params)
                self.delta_params.extend(sigmoid_layer.delta_params)

        linear_func = lambda x: x
        sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                    input=self.ivec_layers[-1].output,
                                    n_in=ivec_layers_sizes[-1],
                                    n_out=n_ins,
                                    activation=linear_func)
        self.ivec_layers.append(sigmoid_layer)
        if 0 in update_part:
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

        for i in xrange(self.conv_layer_num):
            if i == 0:
                input = self.raw + self.ivec_layers[-1].output
            else:
                input = self.conv_layers[-1].output
            config = conv_layer_configs[i]
            conv_layer = ConvLayer(numpy_rng=numpy_rng,
                                   input=input,
                                   input_shape=config['input_shape'],
                                   filter_shape=config['filter_shape'],
                                   poolsize=config['poolsize'],
                                   activation=conv_activation,
                                   flatten=config['flatten'],
                                   use_fast=use_fast)
            self.conv_layers.append(conv_layer)
            if 1 in update_part:
                self.params.extend(conv_layer.params)
                self.delta_params.extend(conv_layer.delta_params)

        self.conv_output_dim = config['output_shape'][1] * config[
            'output_shape'][2] * config['output_shape'][3]

        for i in xrange(self.full_layer_num):
            # construct the sigmoidal layer
            if i == 0:
                input_size = self.conv_output_dim
                layer_input = self.conv_layers[-1].output
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.full_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=full_activation)
            # add the layer to our list of layers
            self.full_layers.append(sigmoid_layer)
            if 1 in update_part:
                self.params.extend(sigmoid_layer.params)
                self.delta_params.extend(sigmoid_layer.delta_params)
# We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(input=self.full_layers[-1].output,
                                           n_in=hidden_layers_sizes[-1],
                                           n_out=n_outs)
        self.full_layers.append(self.logLayer)
        if 1 in update_part:
            self.params.extend(self.logLayer.params)
            self.delta_params.extend(self.logLayer.delta_params)

        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        self.errors = self.logLayer.errors(self.y)
示例#25
0
class CNN(CNNBase):
	""" Instantiation of Convolution neural network ... """
	def __init__(self, numpy_rng, theano_rng, batch_size, n_outs,conv_layer_configs, hidden_layer_configs, 
			use_fast=False,conv_activation = T.nnet.sigmoid,hidden_activation = T.nnet.sigmoid,
			l1_reg=None,l2_reg=None,max_col_norm=None):

		super(CNN, self).__init__(conv_layer_configs, hidden_layer_configs,l1_reg,l2_reg,max_col_norm)
		if not theano_rng:
			theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
			            
		for i in xrange(self.conv_layer_num):		# construct the convolution layer
			if i == 0:  				#is_input layer
				input = self.x
				is_input_layer = True
			else:
				input = self.layers[-1].output #output of previous layer
				is_input_layer = False
			config = conv_layer_configs[i]
	
			conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input,input_shape=config['input_shape'],
				filter_shape=config['filter_shape'],poolsize=config['poolsize'],
				activation = conv_activation, use_fast = use_fast)
			self.layers.append(conv_layer)
			self.conv_layers.append(conv_layer)
			if config['update']==True:	# only few layers of convolution layer are considered for updation
				self.params.extend(conv_layer.params)
				self.delta_params.extend(conv_layer.delta_params)

		hidden_layers = hidden_layer_configs['hidden_layers'];
		self.conv_output_dim = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3]
		adv_activation_configs = hidden_layer_configs['adv_activation'] 
		#flattening the last convolution output layer
		self.features = self.conv_layers[-1].output.flatten(2);
		self.features_dim = self.conv_output_dim;
		
		for i in xrange(self.hidden_layer_num):		# construct the hidden layer
			if i == 0:				# is first sigmoidla layer
				input_size = self.conv_output_dim
				layer_input = self.features
			else:
				input_size = hidden_layers[i - 1]	# number of hidden neurons in previous layers
				layer_input = self.layers[-1].output
			
			
			if adv_activation_configs is None:
				sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, 
						n_out = hidden_layers[i], activation=hidden_activation);
						
			else:
				sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, 
						n_out = hidden_layers[i]*adv_activation_configs['pool_size'], activation=hidden_activation,
						adv_activation_method = adv_activation_configs['method'],
						pool_size = adv_activation_configs['pool_size'],
						pnorm_order = adv_activation_configs['pnorm_order']);
						
						
			self.layers.append(sigmoid_layer)
			self.mlp_layers.append(sigmoid_layer)

			if config['update']==True:	# only few layers of hidden layer are considered for updation
                		self.params.extend(sigmoid_layer.params)
                		self.delta_params.extend(sigmoid_layer.delta_params)
           

		self.logLayer = LogisticRegression(input=self.layers[-1].output,n_in=hidden_layers[-1],n_out=n_outs)
		
		self.layers.append(self.logLayer)
		self.params.extend(self.logLayer.params)
		self.delta_params.extend(self.logLayer.delta_params)
		
		self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
		self.errors = self.logLayer.errors(self.y)
		self.output = self.logLayer.prediction()
		
		#regularization
		if self.l1_reg is not None:
			self.__l1Regularization__(self.hidden_layer_num*2);
		if self.l2_reg is not None:
			self.__l2Regularization__(self.hidden_layer_num*2);
		
		
	def save_mlp2dict(self,withfinal=True,max_layer_num=-1):
		if max_layer_num == -1:
		   max_layer_num = self.hidden_layer_num
		mlp_dict = {}
		for i in range(max_layer_num):
			dict_a = str(i) +' W'
			mlp_dict[dict_a] = _array2string(self.mlp_layers[i].params[0].get_value())
			dict_a = str(i) + ' b'
			mlp_dict[dict_a] = _array2string(self.mlp_layers[i].params[1].get_value())

		if withfinal: 
			dict_a = 'logreg W'
			mlp_dict[dict_a] = _array2string(self.logLayer.params[0].get_value())
			dict_a = 'logreg b'
			mlp_dict[dict_a] = _array2string(self.logLayer.params[1].get_value())
		return mlp_dict
示例#26
0
    def __init__(self, numpy_rng, theano_rng=None,
                 batch_size = 256, n_outs=500,
                 conv_layer_configs = [],
                 hidden_layers_sizes=[500, 500],
                 ivec_layers_sizes=[500, 500],
                 conv_activation = T.nnet.sigmoid,
                 full_activation = T.nnet.sigmoid,
                 use_fast = False,
                 update_part = [0, 1],
                 ivec_dim = 100):

        self.conv_layers = []
        self.full_layers = []
        self.ivec_layers = [] 
        
        self.params = []
        self.delta_params   = []

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  
        self.y = T.ivector('y') 
       
        input_shape = conv_layer_configs[0]['input_shape']
        n_ins = input_shape[-1] * input_shape[-2] * input_shape[-3]

        self.iv = self.x[:,n_ins:n_ins+ivec_dim]
        self.raw = self.x[:,0:n_ins]
 
        self.conv_layer_num = len(conv_layer_configs)
        self.full_layer_num = len(hidden_layers_sizes)
        self.ivec_layer_num = len(ivec_layers_sizes)

        # construct the adaptation NN
        for i in xrange(self.ivec_layer_num):
            if i == 0:
                input_size = ivec_dim
                layer_input = self.iv
            else:
                input_size = ivec_layers_sizes[i - 1]
                layer_input = self.ivec_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=ivec_layers_sizes[i],
                                        activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.ivec_layers.append(sigmoid_layer)
            if 0 in update_part:
                self.params.extend(sigmoid_layer.params)
                self.delta_params.extend(sigmoid_layer.delta_params)

        linear_func = lambda x: x
        sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                    input=self.ivec_layers[-1].output,
                                    n_in=ivec_layers_sizes[-1],
                                    n_out=n_ins,
                                    activation=linear_func)
        self.ivec_layers.append(sigmoid_layer)
        if 0 in update_part:
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)


        for i in xrange(self.conv_layer_num):
            if i == 0:
                input = self.raw + self.ivec_layers[-1].output 
            else:
                input = self.conv_layers[-1].output
            config = conv_layer_configs[i]
            conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input,
			input_shape = config['input_shape'], filter_shape = config['filter_shape'], poolsize = config['poolsize'],
			activation = conv_activation, flatten = config['flatten'], use_fast = use_fast)
	    self.conv_layers.append(conv_layer)
            if 1 in update_part:
	        self.params.extend(conv_layer.params)
                self.delta_params.extend(conv_layer.delta_params)

        self.conv_output_dim = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3]

        for i in xrange(self.full_layer_num):
            # construct the sigmoidal layer
            if i == 0:
                input_size = self.conv_output_dim
                layer_input = self.conv_layers[-1].output
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.full_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=full_activation)
            # add the layer to our list of layers
            self.full_layers.append(sigmoid_layer)
            if 1 in update_part:
                self.params.extend(sigmoid_layer.params)
                self.delta_params.extend(sigmoid_layer.delta_params)
	# We now need to add a logistic layer on top of the MLP
	self.logLayer = LogisticRegression(
			       input=self.full_layers[-1].output,
			       n_in=hidden_layers_sizes[-1], n_out=n_outs)
        self.full_layers.append(self.logLayer)
        if 1 in update_part:
            self.params.extend(self.logLayer.params)
            self.delta_params.extend(self.logLayer.delta_params)

        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        self.errors = self.logLayer.errors(self.y)
示例#27
0
	def __init__(self, numpy_rng, theano_rng, batch_size, n_outs,conv_layer_configs, hidden_layer_configs, 
			use_fast=False,conv_activation = T.nnet.sigmoid,hidden_activation = T.nnet.sigmoid,
			l1_reg=None,l2_reg=None,max_col_norm=None):

		super(DropoutCNN, self).__init__(conv_layer_configs,hidden_layer_configs,l1_reg,l2_reg,max_col_norm)
		
		if not theano_rng:
			theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
            
		for i in xrange(self.conv_layer_num):		# construct the convolution layer
			if i == 0:  				#is_input layer
				input = self.x
				is_input_layer = True
			else:
				input = self.layers[-1].output #output of previous layer
				is_input_layer = False
			config = conv_layer_configs[i]
	
			conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input,input_shape=config['input_shape'],
				filter_shape=config['filter_shape'],poolsize=config['poolsize'],
				activation = conv_activation, use_fast = use_fast)
			self.layers.append(conv_layer)
			self.conv_layers.append(conv_layer)
			if config['update']==True:	# only few layers of convolution layer are considered for updation
				self.params.extend(conv_layer.params)
				self.delta_params.extend(conv_layer.delta_params)

		hidden_layers = hidden_layer_configs['hidden_layers'];
		self.conv_output_dim = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3]
		adv_activation_configs = hidden_layer_configs['adv_activation'] 
		
		#flattening the last convolution output layer
		self.features = self.conv_layers[-1].output.flatten(2);
		self.features_dim = self.conv_output_dim;

		self.dropout_layers = [];
		self.dropout_factor = hidden_layer_configs['dropout_factor'];
		self.input_dropout_factor = hidden_layer_configs['input_dropout_factor'];
		
		for i in xrange(self.hidden_layer_num):		# construct the hidden layer
			if i == 0:				# is first sigmoidla layer
				input_size = self.conv_output_dim
				if self.dropout_factor[i] > 0.0:
					dropout_layer_input = _dropout_from_layer(theano_rng, self.layers[-1].output, self.input_dropout_factor)
				else:
					dropout_layer_input = self.features
				layer_input = self.features
			else:
				input_size = hidden_layers[i - 1]	# number of hidden neurons in previous layers
				dropout_layer_input = self.dropout_layers[-1].dropout_output			
				layer_input = (1 - self.dropout_factor[i-1]) * self.layers[-1].output
				
			if adv_activation_configs is None:
				dropout_sigmoid_layer = DropoutHiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, 
						n_out = hidden_layers[i], activation=hidden_activation,
						dropout_factor = self.dropout_factor[i]);
						
				sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, 
						n_out = hidden_layers[i], activation=hidden_activation,
						W=dropout_sigmoid_layer.W, b=dropout_sigmoid_layer.b);
										
						
			else:
				dropout_sigmoid_layer = DropoutHiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, 
						n_out = hidden_layers[i]*adv_activation_configs['pool_size'], activation=hidden_activation,
						adv_activation_method = adv_activation_configs['method'],
						pool_size = adv_activation_configs['pool_size'],
						pnorm_order = adv_activation_configs['pnorm_order'],
						dropout_factor = self.dropout_factor[i]);
						
				sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input,n_in=input_size, 
						n_out = hidden_layers[i]*adv_activation_configs['pool_size'], activation=hidden_activation,
						adv_activation_method = adv_activation_configs['method'],
						pool_size = adv_activation_configs['pool_size'],
						pnorm_order = adv_activation_configs['pnorm_order'],
						W=dropout_sigmoid_layer.W, b=dropout_sigmoid_layer.b);
						
			self.layers.append(sigmoid_layer)
			self.dropout_layers.append(dropout_sigmoid_layer)
			self.mlp_layers.append(sigmoid_layer)

			if config['update']==True:	# only few layers of hidden layer are considered for updation
						self.params.extend(dropout_sigmoid_layer.params)
						self.delta_params.extend(dropout_sigmoid_layer.delta_params)

		self.dropout_logLayer = LogisticRegression(input=self.dropout_layers[-1].dropout_output,n_in=hidden_layers[-1],n_out=n_outs)
		self.logLayer = LogisticRegression(
							input=(1 - self.dropout_factor[-1]) * self.layers[-1].output,
							n_in=hidden_layers[-1],n_out=n_outs,
							W=self.dropout_logLayer.W, b=self.dropout_logLayer.b)
		
		self.dropout_layers.append(self.dropout_logLayer)
		self.layers.append(self.logLayer)
		self.params.extend(self.dropout_logLayer.params)
		self.delta_params.extend(self.dropout_logLayer.delta_params)
		
		self.finetune_cost = self.dropout_logLayer.negative_log_likelihood(self.y)
		self.errors = self.logLayer.errors(self.y)
		self.output = self.logLayer.prediction()
		
		#regularization
		if self.l1_reg is not None:
			self.__l1Regularization__(self.hidden_layer_num*2);
		if self.l2_reg is not None:
			self.__l2Regularization__(self.hidden_layer_num*2);
示例#28
0
文件: dnn.py 项目: synetkim/multi_asr
    def __init__(
            self,
            numpy_rng,
            theano_rng=None,
            cfg=None,  # the network configuration
            dnn_shared=None,
            shared_layers=[],
            input=None):

        self.layers = []
        self.params = []
        self.delta_params = []

        self.cfg = cfg
        self.n_ins = cfg.n_ins
        self.n_outs = cfg.n_outs
        self.hidden_layers_sizes = cfg.hidden_layers_sizes
        self.hidden_layers_number = len(self.hidden_layers_sizes)
        self.activation = cfg.activation

        self.do_maxout = cfg.do_maxout
        self.pool_size = cfg.pool_size

        self.max_col_norm = cfg.max_col_norm
        self.l1_reg = cfg.l1_reg
        self.l2_reg = cfg.l2_reg

        self.non_updated_layers = cfg.non_updated_layers

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2**30))
        # allocate symbolic variables for the data
        if input == None:
            self.x = T.matrix('x')
        else:
            self.x = input
        self.y = T.ivector('y')

        for i in xrange(self.hidden_layers_number):
            # construct the hidden layer
            if i == 0:
                input_size = self.n_ins
                layer_input = self.x
            else:
                input_size = self.hidden_layers_sizes[i - 1]
                layer_input = self.layers[-1].output

            W = None
            b = None
            if (i in shared_layers):
                W = dnn_shared.layers[i].W
                b = dnn_shared.layers[i].b
            if self.do_maxout == True:
                hidden_layer = HiddenLayer(rng=numpy_rng,
                                           input=layer_input,
                                           n_in=input_size,
                                           n_out=self.hidden_layers_sizes[i] *
                                           self.pool_size,
                                           W=W,
                                           b=b,
                                           activation=(lambda x: 1.0 * x),
                                           do_maxout=True,
                                           pool_size=self.pool_size)
            else:
                hidden_layer = HiddenLayer(rng=numpy_rng,
                                           input=layer_input,
                                           n_in=input_size,
                                           n_out=self.hidden_layers_sizes[i],
                                           W=W,
                                           b=b,
                                           activation=self.activation)
            # add the layer to our list of layers
            self.layers.append(hidden_layer)
            # if the layer index is included in self.non_updated_layers, parameters of this layer will not be updated
            if (i not in self.non_updated_layers):
                self.params.extend(hidden_layer.params)
                self.delta_params.extend(hidden_layer.delta_params)
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(input=self.layers[-1].output,
                                           n_in=self.hidden_layers_sizes[-1],
                                           n_out=self.n_outs)

        if self.n_outs > 0:
            self.layers.append(self.logLayer)
            self.params.extend(self.logLayer.params)
            self.delta_params.extend(self.logLayer.delta_params)

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

        if self.l1_reg is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                self.finetune_cost += self.l1_reg * (abs(W).sum())

        if self.l2_reg is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                self.finetune_cost += self.l2_reg * T.sqr(W).sum()
示例#29
0
class DNN_Dropout(object):

    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 activation = T.nnet.sigmoid,
                 input_dropout_factor = 0,
                 dropout_factor = [0.2,0.2,0.2,0.2,0.2,0.2,0.2],
                 do_maxout = False, pool_size = 1,
                 max_col_norm = None, l1_reg = None, l2_reg = None):

        self.sigmoid_layers = []
        self.dropout_layers = []
        self.params = []
        self.delta_params   = []
        self.n_layers = len(hidden_layers_sizes)

        self.max_col_norm = max_col_norm
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        self.input_dropout_factor = input_dropout_factor
        self.dropout_factor = dropout_factor

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        self.y = T.ivector('y')

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer
            if i == 0:
                input_size = n_ins
                layer_input = self.x
                if input_dropout_factor > 0.0:
                    dropout_layer_input = _dropout_from_layer(theano_rng, self.x, input_dropout_factor)
                else:
                    dropout_layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = (1 - self.dropout_factor[i - 1]) * self.sigmoid_layers[-1].output
                dropout_layer_input = self.dropout_layers[-1].dropout_output

            if do_maxout == False:
                dropout_layer = DropoutHiddenLayer(rng=numpy_rng,
                                        input=dropout_layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation= activation,
                                        dropout_factor=self.dropout_factor[i])
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=activation,
                                        W=dropout_layer.W, b=dropout_layer.b)
            else:
                dropout_layer = DropoutHiddenLayer(rng=numpy_rng,
                                        input=dropout_layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * pool_size,
                                        activation= (lambda x: 1.0*x),
                                        dropout_factor=self.dropout_factor[i],
                                        do_maxout = True, pool_size = pool_size)
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * pool_size,
                                        activation= (lambda x: 1.0*x),
                                        W=dropout_layer.W, b=dropout_layer.b,
                                        do_maxout = True, pool_size = pool_size)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            self.dropout_layers.append(dropout_layer)
            self.params.extend(dropout_layer.params)
            self.delta_params.extend(dropout_layer.delta_params)
        # We now need to add a logistic layer on top of the MLP
        self.dropout_logLayer = LogisticRegression(
                                 input=self.dropout_layers[-1].dropout_output,
                                 n_in=hidden_layers_sizes[-1], n_out=n_outs)

        self.logLayer = LogisticRegression(
                         input=(1 - self.dropout_factor[-1]) * self.sigmoid_layers[-1].output,
                         n_in=hidden_layers_sizes[-1], n_out=n_outs,
                         W=self.dropout_logLayer.W, b=self.dropout_logLayer.b)

        self.dropout_layers.append(self.dropout_logLayer)
        self.sigmoid_layers.append(self.logLayer)
        self.params.extend(self.dropout_logLayer.params)
        self.delta_params.extend(self.dropout_logLayer.delta_params)

        # compute the cost
        self.finetune_cost = self.dropout_logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

        if self.l1_reg is not None:
            for i in xrange(self.n_layers):
                W = self.params[i * 2]
                self.finetune_cost += self.l1_reg * (abs(W).sum())

        if self.l2_reg is not None:
            for i in xrange(self.n_layers):
                W = self.params[i * 2]
                self.finetune_cost += self.l2_reg * T.sqr(W).sum()

    def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size):

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        index = T.lscalar('index')  # index to a [mini]batch
        learning_rate = T.fscalar('learning_rate')
        momentum = T.fscalar('momentum')

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = {}
        for dparam, gparam in zip(self.delta_params, gparams):
            updates[dparam] = momentum * dparam - gparam*learning_rate
        for dparam, param in zip(self.delta_params, self.params):
            updates[param] = param + updates[dparam]

        if self.max_col_norm is not None:
            for i in xrange(self.n_layers):
                W = self.params[i * 2]
                if W in updates:
                    updated_W = updates[W]
                    col_norms = T.sqrt(T.sum(T.sqr(updated_W), axis=0))
                    desired_norms = T.clip(col_norms, 0, self.max_col_norm)
                    updates[W] = updated_W * (desired_norms / (1e-7 + col_norms))

        train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001),
              theano.Param(momentum, default = 0.5)],
              outputs=self.errors,
              updates=updates,
              givens={
                self.x: train_set_x[index * batch_size:
                                    (index + 1) * batch_size],
                self.y: train_set_y[index * batch_size:
                                    (index + 1) * batch_size]})

        valid_fn = theano.function(inputs=[index],
              outputs=self.errors,
              givens={
                self.x: valid_set_x[index * batch_size:
                                    (index + 1) * batch_size],
                self.y: valid_set_y[index * batch_size:
                                    (index + 1) * batch_size]})

        return train_fn, valid_fn
示例#30
0
文件: dnn.py 项目: chagge/kaldiproj
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 activation = T.nnet.sigmoid,
                 do_maxout = False, pool_size = 1, 
                 do_pnorm = False, pnorm_order = 1,
                 max_col_norm = None, l1_reg = None, l2_reg = None):

        self.sigmoid_layers = []
        self.params = []
        self.delta_params   = []
        self.n_layers = len(hidden_layers_sizes)

        self.max_col_norm = max_col_norm
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        self.y = T.ivector('y')

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer
            if i == 0:
                input_size = n_ins
                layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.sigmoid_layers[-1].output

            if do_maxout == True:
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * pool_size,
                                        activation = (lambda x: 1.0*x),
                                        do_maxout = True, pool_size = pool_size)
            elif do_pnorm == True:
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * pool_size,
                                        activation = (lambda x: 1.0*x),
                                        do_pnorm = True, pool_size = pool_size, pnorm_order = pnorm_order)
            else:
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=activation)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
                         input=self.sigmoid_layers[-1].output,
                         n_in=hidden_layers_sizes[-1], n_out=n_outs)

        self.sigmoid_layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)
       
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

        if self.l1_reg is not None:
            for i in xrange(self.n_layers):
                W = self.params[i * 2]
                self.finetune_cost += self.l1_reg * (abs(W).sum())

        if self.l2_reg is not None:
            for i in xrange(self.n_layers):
                W = self.params[i * 2]
                self.finetune_cost += self.l2_reg * T.sqr(W).sum()
示例#31
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 activation = T.nnet.sigmoid,
                 input_dropout_factor = 0,
                 dropout_factor = [0.2,0.2,0.2,0.2,0.2,0.2,0.2],
                 do_maxout = False, pool_size = 1,
                 max_col_norm = None, l1_reg = None, l2_reg = None):

        self.sigmoid_layers = []
        self.dropout_layers = []
        self.params = []
        self.delta_params   = []
        self.n_layers = len(hidden_layers_sizes)

        self.max_col_norm = max_col_norm
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        self.input_dropout_factor = input_dropout_factor
        self.dropout_factor = dropout_factor

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        self.y = T.ivector('y')

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer
            if i == 0:
                input_size = n_ins
                layer_input = self.x
                if input_dropout_factor > 0.0:
                    dropout_layer_input = _dropout_from_layer(theano_rng, self.x, input_dropout_factor)
                else:
                    dropout_layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = (1 - self.dropout_factor[i - 1]) * self.sigmoid_layers[-1].output
                dropout_layer_input = self.dropout_layers[-1].dropout_output

            if do_maxout == False:
                dropout_layer = DropoutHiddenLayer(rng=numpy_rng,
                                        input=dropout_layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation= activation,
                                        dropout_factor=self.dropout_factor[i])
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=activation,
                                        W=dropout_layer.W, b=dropout_layer.b)
            else:
                dropout_layer = DropoutHiddenLayer(rng=numpy_rng,
                                        input=dropout_layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * pool_size,
                                        activation= (lambda x: 1.0*x),
                                        dropout_factor=self.dropout_factor[i],
                                        do_maxout = True, pool_size = pool_size)
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * pool_size,
                                        activation= (lambda x: 1.0*x),
                                        W=dropout_layer.W, b=dropout_layer.b,
                                        do_maxout = True, pool_size = pool_size)
            # add the layer to our list of layers
            self.sigmoid_layers.append(sigmoid_layer)
            self.dropout_layers.append(dropout_layer)
            self.params.extend(dropout_layer.params)
            self.delta_params.extend(dropout_layer.delta_params)
        # We now need to add a logistic layer on top of the MLP
        self.dropout_logLayer = LogisticRegression(
                                 input=self.dropout_layers[-1].dropout_output,
                                 n_in=hidden_layers_sizes[-1], n_out=n_outs)

        self.logLayer = LogisticRegression(
                         input=(1 - self.dropout_factor[-1]) * self.sigmoid_layers[-1].output,
                         n_in=hidden_layers_sizes[-1], n_out=n_outs,
                         W=self.dropout_logLayer.W, b=self.dropout_logLayer.b)

        self.dropout_layers.append(self.dropout_logLayer)
        self.sigmoid_layers.append(self.logLayer)
        self.params.extend(self.dropout_logLayer.params)
        self.delta_params.extend(self.dropout_logLayer.delta_params)

        # compute the cost
        self.finetune_cost = self.dropout_logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

        if self.l1_reg is not None:
            for i in xrange(self.n_layers):
                W = self.params[i * 2]
                self.finetune_cost += self.l1_reg * (abs(W).sum())

        if self.l2_reg is not None:
            for i in xrange(self.n_layers):
                W = self.params[i * 2]
                self.finetune_cost += self.l2_reg * T.sqr(W).sum()
示例#32
0
文件: drn.py 项目: Beronx86/pdnn
    def __init__(self, numpy_rng, theano_rng=None,
                 cfg = None,  # the network configuration
                 dnn_shared = None, shared_layers=[], input = None):

        self.layers = []
        self.params = []
        self.delta_params = []

        self.rnn_layerX = 2
        print "Use DRN"

        self.cfg = cfg
        self.n_ins = cfg.n_ins; self.n_outs = cfg.n_outs
        self.hidden_layers_sizes = cfg.hidden_layers_sizes
        self.hidden_layers_number = len(self.hidden_layers_sizes)
        self.activation = cfg.activation

        self.do_maxout = cfg.do_maxout; self.pool_size = cfg.pool_size

        self.max_col_norm = cfg.max_col_norm
        self.l1_reg = cfg.l1_reg
        self.l2_reg = cfg.l2_reg

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        if input == None:
            self.x = T.matrix('x')
        else:
            self.x = input 
        self.y = T.ivector('y')

        for i in xrange(self.hidden_layers_number):
            # construct the hidden layer
            if i == 0:
                input_size = self.n_ins
                layer_input = self.x
            else:
                input_size = self.hidden_layers_sizes[i - 1]
                layer_input = self.layers[-1].output

            W = None; b = None
            if (i in shared_layers) :
                W = dnn_shared.layers[i].W; b = dnn_shared.layers[i].b
            if i == self.rnn_layerX:
                hidden_layer = RnnLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=self.hidden_layers_sizes[i],
                                        W = W, b = b,
                                        activation=self.activation) 
            else:
                if self.do_maxout == True:
                    hidden_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=self.hidden_layers_sizes[i] * self.pool_size,
                                        W = W, b = b,
                                        activation = (lambda x: 1.0*x),
                                        do_maxout = True, pool_size = self.pool_size)
                else:
                    hidden_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=self.hidden_layers_sizes[i],
                                        W = W, b = b,
                                        activation=self.activation)
            # add the layer to our list of layers
            self.layers.append(hidden_layer)
            self.params.extend(hidden_layer.params)
            self.delta_params.extend(hidden_layer.delta_params)
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
                         input=self.layers[-1].output,
                         n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs)

        if self.n_outs > 0:
            self.layers.append(self.logLayer)
            self.params.extend(self.logLayer.params)
            self.delta_params.extend(self.logLayer.delta_params)
       
        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

        if self.l1_reg is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                self.finetune_cost += self.l1_reg * (abs(W).sum())

        if self.l2_reg is not None:
            for i in xrange(self.hidden_layers_number):
                W = self.layers[i].W
                self.finetune_cost += self.l2_reg * T.sqr(W).sum()
示例#33
0
class DBN(nnet):
    """Deep Belief Network

    A deep belief network is obtained by stacking several RBMs on top of each
    other. The hidden layer of the RBM at layer `i` becomes the input of the
    RBM at layer `i+1`. The first layer RBM gets as input the input of the
    network, and the hidden layer of the last RBM represents the output. When
    used for classification, the DBN is treated as a MLP, by adding a logistic
    regression layer on top.
    """

    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 first_layer_gb = True,pretrainedLayers=None,activation=T.nnet.sigmoid):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type first_layer_gb: bool
        :param first_layer_gb: wether first layer is gausian-bernolli or 
                                bernolli-bernolli
        """
        super(DBN, self).__init__()
        self.layers = []
        self.rbm_layers = []
        self.n_layers = len(hidden_layers_sizes)

        if pretrainedLayers == None:
            self.nPreTrainLayers = n_layers
        else :
            self.nPreTrainLayers = pretrainedLayers

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
                                 # of [int] labels

        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                input_size = n_ins
                layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.layers[-1].output


            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=activation)

            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)

            # the parameters of the sigmoid_layers are parameters of the DBN. 
            # The visible biases in the RBM are parameters of those RBMs, 
            # but not of the DBN.
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

            # Construct an RBM that shared weights with this layer
            # the first layer could be Gaussian-Bernoulli RBM
            # other layers are Bernoulli-Bernoulli RBMs
            if i == 0 and first_layer_gb:
                rbm_layer = GBRBM(numpy_rng=numpy_rng,
                              theano_rng=theano_rng,
                              input=layer_input,
                              n_visible=input_size,
                              n_hidden=hidden_layers_sizes[i],
                              W=sigmoid_layer.W,
                              hbias=sigmoid_layer.b)
            else:
                rbm_layer = RBM(numpy_rng=numpy_rng,
                              theano_rng=theano_rng,
                              input=layer_input,
                              n_visible=input_size,
                              n_hidden=hidden_layers_sizes[i],
                              W=sigmoid_layer.W,
                              hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)            

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

        self.output = self.logLayer.prediction();
        self.features = self.layers[-2].output;
        self.features_dim = self.layers[-2].n_out

    def pretraining_functions(self, train_set_x, batch_size, weight_cost):
        '''Generates a list of functions, for performing one step of
        gradient descent at a given layer. The function will require
        as input the minibatch index, and to train an RBM you just
        need to iterate, calling the corresponding function on all
        minibatch indexes.

        :type train_set_x: theano.tensor.TensorType
        :param train_set_x: Shared var. that contains all datapoints used
                            for training the RBM
        :type batch_size: int
        :param batch_size: size of a [mini]batch
        :param weight_cost: weigth cost

        '''

        # index to a [mini]batch
        index = T.lscalar('index')  # index to a minibatch
        momentum = T.scalar('momentum')
        learning_rate = T.scalar('lr')  # learning rate to use

        # number of batches
        n_batches = train_set_x.get_value(borrow=True).shape[0] / batch_size
        # begining of a batch, given `index`
        batch_begin = index * batch_size
        # ending of a batch given `index`
        batch_end = batch_begin + batch_size

        pretrain_fns = []
        for rbm in self.rbm_layers:

            # get the cost and the updates list
            # using CD-k here (persisent=None,k=1) for training each RBM.
            r_cost, fe_cost, updates = rbm.get_cost_updates(batch_size, learning_rate,
                                                            momentum, weight_cost)
            # compile the theano function
            fn = theano.function(inputs=[index,
                              theano.Param(learning_rate, default=0.0001),
                              theano.Param(momentum, default=0.5)],
                              outputs= [r_cost, fe_cost],
                              updates=updates,
                              givens={self.x: train_set_x[batch_begin:batch_end]})
            # append function to the list of functions
            pretrain_fns.append(fn)

        return pretrain_fns
示例#34
0
    def __init__(self, numpy_rng, theano_rng=None,
                 cfg = None,
                 dnn_shared = None, shared_layers=[]):

        self.layers = []
        self.dropout_layers = []
        self.params = []
        self.delta_params   = []

        self.cfg = cfg
        self.n_ins = cfg.n_ins; self.n_outs = cfg.n_outs
        self.hidden_layers_sizes = cfg.hidden_layers_sizes
        self.hidden_layers_number = len(self.hidden_layers_sizes)
        self.activation = cfg.activation

        self.do_maxout = cfg.do_maxout; self.pool_size = cfg.pool_size
        self.input_dropout_factor = cfg.input_dropout_factor; self.dropout_factor = cfg.dropout_factor

        self.max_col_norm = cfg.max_col_norm
        self.l1_reg = cfg.l1_reg
        self.l2_reg = cfg.l2_reg

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')
        self.y = T.ivector('y')

        for i in range(self.hidden_layers_number):
            # construct the hidden layer
            if i == 0:
                input_size = self.n_ins
                layer_input = self.x
                if self.input_dropout_factor > 0.0:
                    dropout_layer_input = _dropout_from_layer(theano_rng, self.x, self.input_dropout_factor)
                else:
                    dropout_layer_input = self.x
            else:
                input_size = self.hidden_layers_sizes[i - 1]
                layer_input = (1 - self.dropout_factor[i - 1]) * self.layers[-1].output
                dropout_layer_input = self.dropout_layers[-1].dropout_output

            W = None; b = None
            if (i in shared_layers) :
                W = dnn_shared.layers[i].W; b = dnn_shared.layers[i].b

            if self.do_maxout == False:
                dropout_layer = DropoutHiddenLayer(rng=numpy_rng,
                                        input=dropout_layer_input,
                                        n_in=input_size,
                                        n_out=self.hidden_layers_sizes[i],
                                        W = W, b = b,
                                        activation= self.activation,
                                        dropout_factor=self.dropout_factor[i])
                hidden_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=self.hidden_layers_sizes[i],
                                        activation= self.activation,
                                        W=dropout_layer.W, b=dropout_layer.b)
            else:
                dropout_layer = DropoutHiddenLayer(rng=numpy_rng,
                                        input=dropout_layer_input,
                                        n_in=input_size,
                                        n_out=self.hidden_layers_sizes[i] * self.pool_size,
                                        W = W, b = b,
                                        activation= (lambda x: 1.0*x),
                                        dropout_factor=self.dropout_factor[i],
                                        do_maxout = True, pool_size = self.pool_size)
                hidden_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=self.hidden_layers_sizes[i] * self.pool_size,
                                        activation= (lambda x: 1.0*x),
                                        W=dropout_layer.W, b=dropout_layer.b,
                                        do_maxout = True, pool_size = self.pool_size)
            # add the layer to our list of layers
            self.layers.append(hidden_layer)
            self.dropout_layers.append(dropout_layer)
            self.params.extend(dropout_layer.params)
            self.delta_params.extend(dropout_layer.delta_params)
        # We now need to add a logistic layer on top of the MLP
        self.dropout_logLayer = LogisticRegression(
                                 input=self.dropout_layers[-1].dropout_output,
                                 n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs)

        self.logLayer = LogisticRegression(
                         input=(1 - self.dropout_factor[-1]) * self.layers[-1].output,
                         n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs,
                         W=self.dropout_logLayer.W, b=self.dropout_logLayer.b)

        self.dropout_layers.append(self.dropout_logLayer)
        self.layers.append(self.logLayer)
        self.params.extend(self.dropout_logLayer.params)
        self.delta_params.extend(self.dropout_logLayer.delta_params)

        # compute the cost
        self.finetune_cost = self.dropout_logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

        if self.l1_reg is not None:
            for i in range(self.hidden_layers_number):
                W = self.layers[i].W
                self.finetune_cost += self.l1_reg * (abs(W).sum())

        if self.l2_reg is not None:
            for i in range(self.hidden_layers_number):
                W = self.layers[i].W
                self.finetune_cost += self.l2_reg * T.sqr(W).sum()
示例#35
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 first_layer_gb = True,pretrainedLayers=None,activation=T.nnet.sigmoid):
        """This class is made to support a variable number of layers.

        :type numpy_rng: numpy.random.RandomState
        :param numpy_rng: numpy random number generator used to draw initial
                    weights

        :type theano_rng: theano.tensor.shared_randomstreams.RandomStreams
        :param theano_rng: Theano random generator; if None is given one is
                           generated based on a seed drawn from `rng`

        :type n_ins: int
        :param n_ins: dimension of the input to the DBN

        :type hidden_layers_sizes: list of ints
        :param hidden_layers_sizes: intermediate layers size, must contain
                               at least one value

        :type n_outs: int
        :param n_outs: dimension of the output of the network

        :type first_layer_gb: bool
        :param first_layer_gb: wether first layer is gausian-bernolli or 
                                bernolli-bernolli
        """
        super(DBN, self).__init__()
        self.layers = []
        self.rbm_layers = []
        self.n_layers = len(hidden_layers_sizes)

        if pretrainedLayers == None:
            self.nPreTrainLayers = n_layers
        else :
            self.nPreTrainLayers = pretrainedLayers

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))

        # allocate symbolic variables for the data
        self.x = T.matrix('x')  # the data is presented as rasterized images
        self.y = T.ivector('y')  # the labels are presented as 1D vector
                                 # of [int] labels

        # The DBN is an MLP, for which all weights of intermediate
        # layers are shared with a different RBM.  We will first
        # construct the DBN as a deep multilayer perceptron, and when
        # constructing each sigmoidal layer we also construct an RBM
        # that shares weights with that layer. During pretraining we
        # will train these RBMs (which will lead to chainging the
        # weights of the MLP as well) During finetuning we will finish
        # training the DBN by doing stochastic gradient descent on the
        # MLP.

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer

            # the size of the input is either the number of hidden
            # units of the layer below or the input size if we are on
            # the first layer
            # the input to this layer is either the activation of the
            # hidden layer below or the input of the DBN if you are on
            # the first layer
            if i == 0:
                input_size = n_ins
                layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.layers[-1].output


            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=activation)

            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)

            # the parameters of the sigmoid_layers are parameters of the DBN. 
            # The visible biases in the RBM are parameters of those RBMs, 
            # but not of the DBN.
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

            # Construct an RBM that shared weights with this layer
            # the first layer could be Gaussian-Bernoulli RBM
            # other layers are Bernoulli-Bernoulli RBMs
            if i == 0 and first_layer_gb:
                rbm_layer = GBRBM(numpy_rng=numpy_rng,
                              theano_rng=theano_rng,
                              input=layer_input,
                              n_visible=input_size,
                              n_hidden=hidden_layers_sizes[i],
                              W=sigmoid_layer.W,
                              hbias=sigmoid_layer.b)
            else:
                rbm_layer = RBM(numpy_rng=numpy_rng,
                              theano_rng=theano_rng,
                              input=layer_input,
                              n_visible=input_size,
                              n_hidden=hidden_layers_sizes[i],
                              W=sigmoid_layer.W,
                              hbias=sigmoid_layer.b)
            self.rbm_layers.append(rbm_layer)            

        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
            input=self.layers[-1].output,
            n_in=hidden_layers_sizes[-1],
            n_out=n_outs)
        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)

        # compute the cost for second phase of training, defined as the
        # negative log likelihood of the logistic regression (output) layer
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        # compute the gradients with respect to the model parameters
        # symbolic variable that points to the number of errors made on the
        # minibatch given by self.x and self.y
        self.errors = self.logLayer.errors(self.y)

        self.output = self.logLayer.prediction();
        self.features = self.layers[-2].output;
        self.features_dim = self.layers[-2].n_out
示例#36
0
    def __init__(self, numpy_rng, theano_rng=None,
                 cfg = None,  # the network configuration
                 dnn_shared = None, shared_layers=[], input = None):

        self.cfg = cfg
        self.params = []
        self.delta_params   = []
        self.n_ins = cfg.n_ins; self.n_outs = cfg.n_outs
        self.l1_reg = cfg.l1_reg
        self.l2_reg = cfg.l2_reg
        self.do_maxout = cfg.do_maxout; self.pool_size = cfg.pool_size
        self.max_col_norm = cfg.max_col_norm
        print self.max_col_norm

        self.layers = []
        self.bilayers = []
        self.lstm_layers = []
        self.fc_layers = []

        # 1. lstm
        self.lstm_layers_sizes = cfg.lstm_layers_sizes
        self.lstm_layers_number = len(self.lstm_layers_sizes)
        # 2. dnn
        self.hidden_layers_sizes = cfg.hidden_layers_sizes
        self.hidden_layers_number = len(self.hidden_layers_sizes)
        self.activation = cfg.activation

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        if input == None:
            self.x = T.matrix('x')
        else:
            self.x = input 
        self.y = T.ivector('y')

        #######################
        # build lstm layers   #
        #######################
        print '1. start to build lstm layer: '+ str(self.lstm_layers_number)
        for i in xrange(self.lstm_layers_number):
            if i == 0:
                input_size = self.n_ins
                input = self.x
            else:
                input_size = self.lstm_layers_sizes[i - 1]
                input = self.bilayers[-1].output
            
            # Forward
            f_lstm_layer = LSTMLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.lstm_layers_sizes[i])
            print '\tbuild f_lstm layer: ' + str(input_size) +' x '+ str(f_lstm_layer.n_out)
            self.layers.append(f_lstm_layer)
            self.lstm_layers.append(f_lstm_layer)
            self.params.extend(f_lstm_layer.params)
            self.delta_params.extend(f_lstm_layer.delta_params)

            # Backward
            b_lstm_layer = LSTMLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.lstm_layers_sizes[i], backwards=True)
            print '\tbuild b_lstm layer: ' + str(input_size) +' x '+ str(b_lstm_layer.n_out)
            self.layers.append(b_lstm_layer)
            self.lstm_layers.append(b_lstm_layer)
            self.params.extend(b_lstm_layer.params)
            self.delta_params.extend(b_lstm_layer.delta_params)

            # Sum forward + backward
            bi_layer = SUMLayer(finput=f_lstm_layer.output,binput=b_lstm_layer.output[::-1], n_out=self.lstm_layers_sizes[i - 1])
            self.bilayers.append(bi_layer)
            print '\tbuild sum layer: ' + str(input_size) +' x '+ str(bi_layer.n_out)

        print '1. finish lstm layer: '+ str(self.bilayers[-1].n_out)

        #######################
        # build log layers   #
        #######################
        print '3. start to build log layer: 1'
        input_size = self.bilayers[-1].n_out
        input = self.bilayers[-1].output
        logLayer = LogisticRegression(input=input, n_in=input_size, n_out=self.n_outs)
        print '\tbuild final layer: ' + str(input_size) +' x '+ str(self.n_outs)
        self.layers.append(logLayer)
        self.params.extend(logLayer.params)
        self.delta_params.extend(logLayer.delta_params)
        print '3. finish log layer: '+ str(self.bilayers[-1].n_out)
        print 'Total layers: '+ str(len(self.layers))

        sys.stdout.flush()

        self.finetune_cost = logLayer.negative_log_likelihood(self.y)
        self.errors = logLayer.errors(self.y)
示例#37
0
    def __init__(self, numpy_rng, theano_rng=None, n_ins=784,
                 hidden_layers_sizes=[500, 500], n_outs=10,
                 activation = T.nnet.sigmoid, adv_activation = None,
                 max_col_norm = None, l1_reg = None, l2_reg = None):

        super(DNN, self).__init__()
        
        self.layers = []
        self.n_layers = len(hidden_layers_sizes)

        self.max_col_norm = max_col_norm
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        assert self.n_layers > 0

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        self.y = T.ivector('y')

        for i in xrange(self.n_layers):
            # construct the sigmoidal layer
            if i == 0:
                input_size = n_ins
                layer_input = self.x
            else:
                input_size = hidden_layers_sizes[i - 1]
                layer_input = self.layers[-1].output

            if not adv_activation is  None:
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i] * pool_size,
                                        activation = activation,
                                        adv_activation_method = adv_activation['method'],
                                        pool_size = adv_activation['pool_size'],
                                        pnorm_order = adv_activation['pnorm_order'])
            else:
                sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=activation)
                                        
            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)
            
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
                         input=self.layers[-1].output,
                         n_in=hidden_layers_sizes[-1], n_out=n_outs)

        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)
       
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)

        if self.l1_reg is not None:
            self.__l1Regularization__();

        if self.l2_reg is not None:
            self.__l2Regularization__();

        self.output = self.logLayer.prediction();
        self.features = self.layers[-2].output;
        self.features_dim = self.layers[-2].n_out
示例#38
0
    def __init__(self, numpy_rng, theano_rng=None,
                 upper_hidden_layers_sizes=[500, 500], n_outs=10,
                 tower1_hidden_layers_sizes=[500, 500], tower1_n_ins = 100,
                 tower2_hidden_layers_sizes=[500, 500], tower2_n_ins = 100,
                 activation = T.nnet.sigmoid,
                 do_maxout = False, pool_size = 1, 
                 do_pnorm = False, pnorm_order = 1,
                 max_col_norm = None, l1_reg = None, l2_reg = None):

        self.tower1_layers = []
        self.tower2_layers = []
        self.upper_layers = []

        self.params = []
        self.delta_params   = []

        self.max_col_norm = max_col_norm
        self.l1_reg = l1_reg
        self.l2_reg = l2_reg

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x') 
        self.y = T.ivector('y')

        self.tower1_input = self.x[:,0:tower1_n_ins]
        self.tower2_input = self.x[:,tower1_n_ins:(tower1_n_ins + tower2_n_ins)]

        # build tower1
        for i in xrange(len(tower1_hidden_layers_sizes)):
            if i == 0:
                input_size = tower1_n_ins
                layer_input = self.tower1_input
            else:
                input_size = tower1_hidden_layers_sizes[i - 1]
                layer_input = self.tower1_layers[-1].output 

            layer = HiddenLayer(rng=numpy_rng,
                                input=layer_input,
                                n_in=input_size,
                                n_out=tower1_hidden_layers_sizes[i],
                                activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.tower1_layers.append(layer)
            self.params.extend(layer.params)
            self.delta_params.extend(layer.delta_params)

        # build tower2
        for i in xrange(len(tower2_hidden_layers_sizes)):
            if i == 0:
                input_size = tower2_n_ins
                layer_input = self.tower2_input
            else:
                input_size = tower2_hidden_layers_sizes[i - 1]
                layer_input = self.tower2_layers[-1].output     

            layer = HiddenLayer(rng=numpy_rng,
                                input=layer_input,
                                n_in=input_size,
                                n_out=tower2_hidden_layers_sizes[i],
                                activation=T.nnet.sigmoid)
            # add the layer to our list of layers
            self.tower2_layers.append(layer)
            self.params.extend(layer.params)
            self.delta_params.extend(layer.delta_params)

        for i in xrange(len(upper_hidden_layers_sizes)):
            # construct the sigmoidal layer
            if i == 0:
                input_size = tower1_hidden_layers_sizes[-1] + tower2_hidden_layers_sizes[-1]
                layer_input = T.concatenate([self.tower1_layers[-1].output, self.tower2_layers[-1].output], axis=1)
            else:
                input_size = upper_hidden_layers_sizes[i - 1]
                layer_input = self.upper_layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=upper_hidden_layers_sizes[i],
                                        activation=activation)
            # add the layer to our list of layers
            self.upper_layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)
        # We now need to add a logistic layer on top of the MLP
        self.logLayer = LogisticRegression(
                         input=self.upper_layers[-1].output,
                         n_in=upper_hidden_layers_sizes[-1], n_out=n_outs)

        self.upper_layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)
       
        # construct a function that implements one step of finetunining

        # compute the cost for second phase of training,
        # defined as the negative log likelihood
        self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)
        self.errors = self.logLayer.errors(self.y)
示例#39
0
class CNN(object):

    def __init__(self, numpy_rng, theano_rng=None,
                 batch_size = 256, n_outs=500,
		 sparsity = None, sparsity_weight = None, sparse_layer = 3,
                 conv_layer_configs = [],
                 hidden_layers_sizes=[500, 500],
                 conv_activation = T.nnet.sigmoid,
                 full_activation = T.nnet.sigmoid,
                 use_fast = False):

        self.layers = []
        self.params = []
        self.delta_params   = []

        self.sparsity = sparsity
        self.sparsity_weight = sparsity_weight
        self.sparse_layer = sparse_layer

        if not theano_rng:
            theano_rng = RandomStreams(numpy_rng.randint(2 ** 30))
        # allocate symbolic variables for the data
        self.x = T.matrix('x')  
        self.y = T.ivector('y') 
        
        self.conv_layer_num = len(conv_layer_configs)
        self.full_layer_num = len(hidden_layers_sizes)

        for i in xrange(self.conv_layer_num):
            if i == 0:
                input = self.x
                is_input_layer = True
            else:
                input = self.layers[-1].output
                is_input_layer = False
            config = conv_layer_configs[i]
            conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input, is_input_layer = is_input_layer,
			input_shape = config['input_shape'], filter_shape = config['filter_shape'], poolsize = config['poolsize'],
			activation = conv_activation, flatten = config['flatten'])
	    self.layers.append(conv_layer)
	    self.params.extend(conv_layer.params)
            self.delta_params.extend(conv_layer.delta_params)

        self.conv_output_dim = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3]

        for i in xrange(self.full_layer_num):
            # construct the sigmoidal layer
            if i == 0:
                input_size = self.conv_output_dim
            else:
                input_size = hidden_layers_sizes[i - 1]
            layer_input = self.layers[-1].output

            sigmoid_layer = HiddenLayer(rng=numpy_rng,
                                        input=layer_input,
                                        n_in=input_size,
                                        n_out=hidden_layers_sizes[i],
                                        activation=full_activation)
            # add the layer to our list of layers
            self.layers.append(sigmoid_layer)
            self.params.extend(sigmoid_layer.params)
            self.delta_params.extend(sigmoid_layer.delta_params)

	# We now need to add a logistic layer on top of the MLP
	self.logLayer = LogisticRegression(
			       input=self.layers[-1].output,
			       n_in=hidden_layers_sizes[-1], n_out=n_outs)
        self.layers.append(self.logLayer)
        self.params.extend(self.logLayer.params)
        self.delta_params.extend(self.logLayer.delta_params)

	if self.sparsity_weight is not None:
            sparsity_level = T.extra_ops.repeat(self.sparsity, 630)
	    avg_act = self.sigmoid_layers[sparse_layer].output.mean(axis=0)
	    kl_div = self.kl_divergence(sparsity_level, avg_act)
	    self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) + self.sparsity_weight * kl_div.sum()     
	else:
            self.finetune_cost = self.logLayer.negative_log_likelihood(self.y)

        self.errors = self.logLayer.errors(self.y)

    def kl_divergence(self, p, p_hat):
        return p * T.log(p / p_hat) + (1 - p) * T.log((1 - p) / (1 - p_hat))

    def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size):

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        index = T.lscalar('index')  # index to a [mini]batch
        learning_rate = T.fscalar('learning_rate')
        momentum = T.fscalar('momentum')

        # compute the gradients with respect to the model parameters
        gparams = T.grad(self.finetune_cost, self.params)

        # compute list of fine-tuning updates
        updates = collections.OrderedDict()
        for dparam, gparam in zip(self.delta_params, gparams):
            updates[dparam] = momentum * dparam - gparam*learning_rate
        for dparam, param in zip(self.delta_params, self.params):
            updates[param] = param + updates[dparam]

        train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default = 0.0001),
              theano.Param(momentum, default = 0.5)],
              outputs=self.errors,
              updates=updates,
              givens={
                self.x: train_set_x[index * batch_size:
                                    (index + 1) * batch_size],
		self.y: train_set_y[index * batch_size:
			            (index + 1) * batch_size]})

        valid_fn = theano.function(inputs=[index],
              outputs=self.errors,
              givens={
                self.x: valid_set_x[index * batch_size:
                                    (index + 1) * batch_size],
		self.y: valid_set_y[index * batch_size:
			            (index + 1) * batch_size]})

        return train_fn, valid_fn