Exemplo n.º 1
0
    def build_finetune_functions(self,
                                 train_shared_xy,
                                 valid_shared_xy,
                                 use_lhuc=False,
                                 layer_index=0):
        """ This function is to build finetune functions and to update gradients

        :param train_shared_xy: theano shared variable for input and output training data
        :type train_shared_xy: tuple of shared variable
        :param valid_shared_xy: theano shared variable for input and output development data
        :type valid_shared_xy: tuple of shared variable
        :returns: finetune functions for training and development

        """

        logger = logging.getLogger("DNN initialization")

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        lr = T.scalar('lr', dtype=theano.config.floatX)
        mom = T.scalar('mom', dtype=theano.config.floatX)  # momentum

        cost = self.finetune_cost  # + self.L2_reg * self.L2_sqr

        ## added for LHUC
        if use_lhuc:
            # In lhuc the parameters are only scaling parameters which have the name 'c'
            self.lhuc_params = []
            for p in self.params:
                if p.name == 'c':
                    self.lhuc_params.append(p)
            params = self.lhuc_params
            gparams = T.grad(cost, params)
        else:
            params = self.params
            gparams = T.grad(cost, params)

        freeze_params = 0
        for layer in range(layer_index):
            freeze_params += len(self.rnn_layers[layer].params)

        # use optimizer
        if self.optimizer == 'sgd':
            # zip just concatenate two lists
            updates = OrderedDict()

            for i, (param, gparam) in enumerate(zip(params, gparams)):
                weight_update = self.updates[param]
                upd = mom * weight_update - lr * gparam
                updates[weight_update] = upd

                # freeze layers and update weights
                if i >= freeze_params:
                    updates[param] = param + upd

        elif self.optimizer == 'adam':
            updates = compile_ADAM_train_function(self,
                                                  gparams,
                                                  learning_rate=lr)
        elif self.optimizer == 'rprop':
            updates = compile_RPROP_train_function(self, gparams)
        else:
            logger.critical(
                "This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n"
                % (self.optimizer))
            sys.exit(1)

        train_model = theano.function(
            inputs=[lr, mom],  # index, batch_size
            outputs=self.errors,
            updates=updates,
            givens={
                self.x:
                train_set_x,  # [index*batch_size:(index + 1)*batch_size]
                self.y: train_set_y,
                self.is_train: np.cast['int32'](1)
            },
            on_unused_input='ignore')

        valid_model = theano.function(inputs=[],
                                      outputs=self.errors,
                                      givens={
                                          self.x: valid_set_x,
                                          self.y: valid_set_y,
                                          self.is_train: np.cast['int32'](0)
                                      },
                                      on_unused_input='ignore')

        return train_model, valid_model
Exemplo n.º 2
0
    def build_finetune_functions(self, train_shared_xy, valid_shared_xy):
        """ This function is to build finetune functions and to update gradients

        :param train_shared_xy: theano shared variable for input and output training data
        :type train_shared_xy: tuple of shared variable
        :param valid_shared_xy: theano shared variable for input and output development data
        :type valid_shared_xy: tuple of shared variable
        :returns: finetune functions for training and development

        """

        logger = logging.getLogger("DNN initialization")

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        lr = T.scalar('lr', dtype=theano.config.floatX)
        mom = T.scalar('mom', dtype=theano.config.floatX)  # momentum

        cost = self.finetune_cost  #+ self.L2_reg * self.L2_sqr

        gparams = T.grad(cost, self.params)

        # use optimizer
        if self.optimizer == 'sgd':
            # zip just concatenate two lists
            updates = OrderedDict()

            for param, gparam in zip(self.params, gparams):
                weight_update = self.updates[param]
                upd = mom * weight_update - lr * gparam
                updates[weight_update] = upd
                updates[param] = param + upd
        elif self.optimizer == 'adam':
            updates = compile_ADAM_train_function(self,
                                                  gparams,
                                                  learning_rate=lr)
        elif self.optimizer == 'rprop':
            updates = compile_RPROP_train_function(self, gparams)
        else:
            logger.critical(
                "This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n"
                % (self.optimizer))
            sys.exit(1)

        train_model = theano.function(
            inputs=[lr, mom],  #index, batch_size
            outputs=self.errors,
            updates=updates,
            givens={
                self.x:
                train_set_x,  #[index*batch_size:(index + 1)*batch_size]
                self.y: train_set_y,
                self.is_train: np.cast['int32'](1)
            },
            on_unused_input='ignore')

        valid_model = theano.function(inputs=[],
                                      outputs=self.errors,
                                      givens={
                                          self.x: valid_set_x,
                                          self.y: valid_set_y,
                                          self.is_train: np.cast['int32'](0)
                                      },
                                      on_unused_input='ignore')

        return train_model, valid_model
Exemplo n.º 3
0
    def build_finetune_functions_S2SPF(self, train_shared_xydf, valid_shared_xydf, layer_index=6):
        """ This function is to build finetune functions and to update gradients
        
        :param train_shared_xy: theano shared variable for input and output training data 
        :type train_shared_xy: tuple of shared variable
        :param valid_shared_xy: theano shared variable for input and output development data
        :type valid_shared_xy: tuple of shared variable
        :returns: finetune functions for training and development
        
        """

        (train_set_x, train_set_y, train_set_d, train_set_f) = train_shared_xydf
        (valid_set_x, valid_set_y, valid_set_d, valid_set_f) = valid_shared_xydf

        lr = T.scalar('lr', dtype = theano.config.floatX)
        mom = T.scalar('mom', dtype = theano.config.floatX)  # momentum

        cost = self.finetune_cost #+ self.L2_reg * self.L2_sqr

        params = self.params
        gparams = T.grad(cost, params)

        encoder_params = 0
        for layer in range(layer_index):
            encoder_params += len(self.rnn_layers[layer].params)

        # use optimizer
        if self.optimizer=='sgd':
            # zip just concatenate two lists
            updates = OrderedDict()

            for i, (param, gparam) in enumerate(zip(params, gparams)):
                weight_update = self.updates[param]
                if i >= encoder_params:
                    upd = mom * weight_update - lr * gparam
                else:
                    upd = mom * weight_update - (lr*2) * gparam
                updates[weight_update] = upd
                updates[param] = param + upd

        elif self.optimizer=='adam':
            updates = compile_ADAM_train_function(self, gparams, learning_rate=lr)
        elif self.optimizer=='rprop':
            updates = compile_RPROP_train_function(self, gparams)
        else: 
            logger.critical("This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n" %(self.optimizer))
            sys.exit(1)


        train_model = theano.function(inputs = [lr, mom],  
                                      outputs = self.errors,
                                      updates = updates,
                                      givens = {self.x: train_set_x, 
                                                self.y: train_set_y,
                                                self.d: train_set_d,
                                                self.f: train_set_f,
                                                self.is_train: np.cast['int32'](1)}, on_unused_input='ignore')


        valid_model = theano.function(inputs = [],
                                      outputs = self.errors,
                                      givens = {self.x: valid_set_x,
                                                self.y: valid_set_y,
                                                self.d: valid_set_d,
                                                self.f: valid_set_f,
                                                self.is_train: np.cast['int32'](0)}, on_unused_input='ignore')
 
        return  train_model, valid_model
Exemplo n.º 4
0
    def build_finetune_functions_S2SPF(self,
                                       train_shared_xydf,
                                       valid_shared_xydf,
                                       layer_index=6):
        """ This function is to build finetune functions and to update gradients
        
        :param train_shared_xy: theano shared variable for input and output training data 
        :type train_shared_xy: tuple of shared variable
        :param valid_shared_xy: theano shared variable for input and output development data
        :type valid_shared_xy: tuple of shared variable
        :returns: finetune functions for training and development
        
        """

        (train_set_x, train_set_y, train_set_d,
         train_set_f) = train_shared_xydf
        (valid_set_x, valid_set_y, valid_set_d,
         valid_set_f) = valid_shared_xydf

        lr = T.scalar('lr', dtype=theano.config.floatX)
        mom = T.scalar('mom', dtype=theano.config.floatX)  # momentum

        cost = self.finetune_cost  # + self.L2_reg * self.L2_sqr

        params = self.params
        gparams = T.grad(cost, params)

        encoder_params = 0
        for layer in range(layer_index):
            encoder_params += len(self.rnn_layers[layer].params)

        # use optimizer
        if self.optimizer == 'sgd':
            # zip just concatenate two lists
            updates = OrderedDict()

            for i, (param, gparam) in enumerate(zip(params, gparams)):
                weight_update = self.updates[param]
                if i >= encoder_params:
                    upd = mom * weight_update - lr * gparam
                else:
                    upd = mom * weight_update - (lr * 2) * gparam
                updates[weight_update] = upd
                updates[param] = param + upd

        elif self.optimizer == 'adam':
            updates = compile_ADAM_train_function(self,
                                                  gparams,
                                                  learning_rate=lr)
        elif self.optimizer == 'rprop':
            updates = compile_RPROP_train_function(self, gparams)
        else:
            logger.critical(
                "This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n"
                % (self.optimizer))
            sys.exit(1)

        train_model = theano.function(inputs=[lr, mom],
                                      outputs=self.errors,
                                      updates=updates,
                                      givens={
                                          self.x: train_set_x,
                                          self.y: train_set_y,
                                          self.d: train_set_d,
                                          self.f: train_set_f,
                                          self.is_train: np.cast['int32'](1)
                                      },
                                      on_unused_input='ignore')

        valid_model = theano.function(inputs=[],
                                      outputs=self.errors,
                                      givens={
                                          self.x: valid_set_x,
                                          self.y: valid_set_y,
                                          self.d: valid_set_d,
                                          self.f: valid_set_f,
                                          self.is_train: np.cast['int32'](0)
                                      },
                                      on_unused_input='ignore')

        return train_model, valid_model
Exemplo n.º 5
0
    def build_finetune_functions(self, train_shared_xy, valid_shared_xy, use_lhuc=False, layer_index=0):
        """ This function is to build finetune functions and to update gradients

        :param train_shared_xy: theano shared variable for input and output training data
        :type train_shared_xy: tuple of shared variable
        :param valid_shared_xy: theano shared variable for input and output development data
        :type valid_shared_xy: tuple of shared variable
        :returns: finetune functions for training and development

        """

        logger = logging.getLogger("DNN initialization")

        (train_set_x, train_set_y) = train_shared_xy
        (valid_set_x, valid_set_y) = valid_shared_xy

        lr = T.scalar('lr', dtype = theano.config.floatX)
        mom = T.scalar('mom', dtype = theano.config.floatX)  # momentum

        cost = self.finetune_cost #+ self.L2_reg * self.L2_sqr
        
        ## added for LHUC
        if use_lhuc:
            # In lhuc the parameters are only scaling parameters which have the name 'c'
            self.lhuc_params = []
            for p in self.params:
                if p.name == 'c':
                    self.lhuc_params.append(p)
            params = self.lhuc_params
            gparams = T.grad(cost, params)
        else:
            params = self.params
            gparams = T.grad(cost, params)


        freeze_params = 0
        for layer in range(layer_index):
            freeze_params += len(self.rnn_layers[layer].params)

        # use optimizer
        if self.optimizer=='sgd':
            # zip just concatenate two lists
            updates = OrderedDict()

            for i, (param, gparam) in enumerate(zip(params, gparams)):
                weight_update = self.updates[param]
                upd = mom * weight_update - lr * gparam
                updates[weight_update] = upd

                # freeze layers and update weights
                if i >= freeze_params:
                    updates[param] = param + upd

        elif self.optimizer=='adam':
            updates = compile_ADAM_train_function(self, gparams, learning_rate=lr)
        elif self.optimizer=='rprop':
            updates = compile_RPROP_train_function(self, gparams)
        else: 
            logger.critical("This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n" %(self.optimizer))
            sys.exit(1)

        train_model = theano.function(inputs = [lr, mom],  #index, batch_size
                                      outputs = self.errors,
                                      updates = updates,
                                      givens = {self.x: train_set_x, #[index*batch_size:(index + 1)*batch_size]
                                                self.y: train_set_y,
                                                self.is_train: np.cast['int32'](1)}, on_unused_input='ignore')


        valid_model = theano.function(inputs = [],
                                      outputs = self.errors,
                                      givens = {self.x: valid_set_x,
                                                self.y: valid_set_y,
                                                self.is_train: np.cast['int32'](0)}, on_unused_input='ignore')

        return  train_model, valid_model