Exemplo n.º 1
0
	def modeling(self, l = [2121, 100, 50, 10, 1]):
		"""
		Generate a model with the give number of layers.
		Previously, always a 5 layer model is generated. 
		Now it is changed to make adaptive number of layers. 
		If l = [2121, 1], it is linear regressoin method. 

		- l2_param is a self parameter rather than an input parameter.
		"""
		l2_param = self.l2_param

		model = Sequential()
		model.add(Dense( l[1], input_shape=(l[0],)))
		model.regularizers = [l2(l2_param)]

		for n_w_l in l[2:]:
			model.add(Activation('relu'))
			#model.add(Dropout(0.4))
			# model.add(Dense( n_w_l, W_regularizer = l2(.01)))
			model.add(Dense( n_w_l))
	 
		return model
Exemplo n.º 2
0
    def trainLayer(self,
                   data=None,
                   trgt=None,
                   ifold=0,
                   hidden_neurons=[400],
                   layer=1,
                   regularizer=None,
                   regularizer_param=None):
        '''
            Method used to perform the layerwise algorithm to train the SAE
        '''
        # Change elements equal to zero to one
        for i in range(len(hidden_neurons)):
            if hidden_neurons[i] == 0:
                hidden_neurons[i] = 1
        if (layer <= 0) or (layer > len(hidden_neurons)):
            print "[-] Error: The parameter layer must be greater than zero and less or equal to the length of list hidden_neurons"
            return -1

        if self.trn_params.params['verbose']:
            print '[+] Using %s as optmizer algorithm' % self.trn_params.params[
                'optmizerAlgorithm']

        neurons_str = self.getNeuronsString(data, hidden_neurons[:layer])

        if regularizer != None and len(regularizer) != 0:
            model_str = '%s/%s/%s_%i_folds_%s_%s_neurons_%s_regularizer(%f)' % (
                self.save_path, self.analysis_str, self.prefix_str,
                self.n_folds, self.params_str, neurons_str, regularizer,
                regularizer_param)
        else:
            model_str = '%s/%s/%s_%i_folds_%s_%s_neurons' % (
                self.save_path, self.analysis_str, self.prefix_str,
                self.n_folds, self.params_str, neurons_str)
        if not self.development_flag:
            file_name = '%s_fold_%i_model.h5' % (model_str, ifold)
            if os.path.exists(file_name):
                if self.trn_params.params['verbose']:
                    print 'File %s exists' % (file_name)
                # load model
                file_name = '%s_fold_%i_model.h5' % (model_str, ifold)
                classifier = load_model(
                    file_name,
                    custom_objects={
                        '%s' % self.trn_params.params['loss']:
                        self.lossFunction
                    })
                file_name = '%s_fold_%i_trn_desc.jbl' % (model_str, ifold)
                trn_desc = joblib.load(file_name)
                return ifold, classifier, trn_desc
        else:
            file_name = '%s_fold_%i_model_dev.h5' % (model_str, ifold)
            if os.path.exists(file_name):
                if self.trn_params.params['verbose']:
                    print 'File %s exists' % (file_name)
                # load model
                file_name = '%s_fold_%i_model_dev.h5' % (model_str, ifold)
                classifier = load_model(
                    file_name,
                    custom_objects={
                        '%s' % self.trn_params.params['loss']:
                        self.lossFunction
                    })
                file_name = '%s_fold_%i_trn_desc_dev.jbl' % (model_str, ifold)
                trn_desc = joblib.load(file_name)
                return ifold, classifier, trn_desc

        norm_data = self.normalizeData(data, ifold)

        best_init = 0
        best_loss = 10e6

        classifier = []
        trn_desc = {}

        for i_init in range(self.n_inits):
            model = Sequential()
            proj_all_data = norm_data
            if layer == 1:

                model.add(
                    Dense(units=hidden_neurons[layer - 1],
                          input_dim=data.shape[1],
                          kernel_initializer="uniform"))
                model.add(
                    Activation(self.trn_params.params['hidden_activation']))
                model.add(
                    Dense(units=data.shape[1],
                          input_dim=hidden_neurons[layer - 1],
                          kernel_initializer="uniform"))
                model.add(
                    Activation(self.trn_params.params['output_activation']))
            elif layer > 1:
                for ilayer in range(1, layer):
                    neurons_str = self.getNeuronsString(
                        data, hidden_neurons[:ilayer])
                    if regularizer != None and len(regularizer) != 0:
                        previous_model_str = '%s/%s/%s_%i_folds_%s_%s_neurons_%s_regularizer(%f)' % (
                            self.save_path, self.analysis_str, self.prefix_str,
                            self.n_folds, self.params_str, neurons_str,
                            regularizer, regularizer_param)
                    else:
                        previous_model_str = '%s/%s/%s_%i_folds_%s_%s_neurons' % (
                            self.save_path, self.analysis_str, self.prefix_str,
                            self.n_folds, self.params_str, neurons_str)
                    if not self.development_flag:
                        file_name = '%s_fold_%i_model.h5' % (
                            previous_model_str, ifold)
                    else:
                        file_name = '%s_fold_%i_model_dev.h5' % (
                            previous_model_str, ifold)

                    # Check if previous layer model was trained
                    if not os.path.exists(file_name):
                        self.trainLayer(data=data,
                                        trgt=trgt,
                                        ifold=ifold,
                                        hidden_neurons=hidden_neurons[:ilayer],
                                        layer=ilayer,
                                        regularizer=regularizer,
                                        regularizer_param=regularizer_param)

                    layer_model = load_model(
                        file_name,
                        custom_objects={
                            '%s' % self.trn_params.params['loss']:
                            self.lossFunction
                        })

                    get_layer_output = K.function(
                        [layer_model.layers[0].input],
                        [layer_model.layers[1].output])
                    # Projection of layer
                    proj_all_data = get_layer_output([proj_all_data])[0]

                model.add(
                    Dense(units=hidden_neurons[layer - 1],
                          input_dim=proj_all_data.shape[1],
                          kernel_initializer="uniform"))
                model.add(
                    Activation(self.trn_params.params['hidden_activation']))
                if regularizer == "dropout":
                    model.add(Dropout(regularizer_param))
                elif regularizer == "l1":
                    model.regularizers = [l1(regularizer_param)]
                elif regularizer == "l2":
                    model.regularizers = [l2(regularizer_param)]
                model.add(
                    Dense(units=proj_all_data.shape[1],
                          input_dim=hidden_neurons[layer - 1],
                          kernel_initializer="uniform"))
                model.add(
                    Activation(self.trn_params.params['output_activation']))
                norm_data = proj_all_data

    # end of elif layer > 1:

            print 'Layer: %i - Neuron: %i - Fold %i of %i Folds -  Init %i of %i Inits' % (
                layer, hidden_neurons[layer - 1], ifold + 1, self.n_folds,
                i_init + 1, self.n_inits)

            model.compile(loss=self.lossFunction,
                          optimizer=self.optmizer,
                          metrics=self.trn_params.params['metrics'])
            # Train model
            earlyStopping = callbacks.EarlyStopping(
                monitor='val_loss',
                patience=self.trn_params.params['patience'],
                verbose=self.trn_params.params['train_verbose'],
                mode='auto')

            init_trn_desc = model.fit(
                norm_data,
                norm_data,
                epochs=self.trn_params.params['n_epochs'],
                batch_size=self.trn_params.params['batch_size'],
                callbacks=[earlyStopping],
                verbose=self.trn_params.params['verbose'],
                validation_split=0.15,
                shuffle=True)
            if np.min(init_trn_desc.history['val_loss']) < best_loss:
                best_init = i_init
                best_loss = np.min(init_trn_desc.history['val_loss'])
                classifier = model
                trn_desc['epochs'] = init_trn_desc.epoch

                for imetric in range(len(self.trn_params.params['metrics'])):
                    if self.trn_params.params['metrics'][
                            imetric] == 'accuracy':
                        metric = 'acc'
                    else:
                        metric = self.trn_params.params['metrics'][imetric]
                    trn_desc[metric] = init_trn_desc.history[metric]
                    trn_desc['val_' + metric] = init_trn_desc.history['val_' +
                                                                      metric]

                trn_desc['loss'] = init_trn_desc.history['loss']
                trn_desc['val_loss'] = init_trn_desc.history['val_loss']

        # save model
        if not self.development_flag:
            file_name = '%s_fold_%i_model.h5' % (model_str, ifold)
            classifier.save(file_name)
            file_name = '%s_fold_%i_trn_desc.jbl' % (model_str, ifold)
            joblib.dump([trn_desc], file_name, compress=9)
        else:
            file_name = '%s_fold_%i_model_dev.h5' % (model_str, ifold)
            classifier.save(file_name)
            file_name = '%s_fold_%i_trn_desc_dev.jbl' % (model_str, ifold)
            joblib.dump([trn_desc], file_name, compress=9)
        return ifold, classifier, trn_desc
Exemplo n.º 3
0
    def trainLayer(self,
                   data=None,
                   trgt=None,
                   transformed_data=None,
                   ifold=0,
                   hidden_neurons=[80],
                   layer=1,
                   folds_sweep=False,
                   regularizer=None,
                   regularizer_param=None,
                   sort=999,
                   etBinIdx=999,
                   etaBinIdx=999,
                   tuning_folder=None):
        # Change elements equal to zero to one
        for i in range(len(hidden_neurons)):
            if hidden_neurons[i] == 0:
                hidden_neurons[i] = 1
        if (layer <= 0) or (layer > len(hidden_neurons)):
            print "[-] Error: The parameter layer must be greater than zero and less or equal to the length of list hidden_neurons"
            return -1

        if self.trn_params.params['verbose']:
            print '[+] Using %s as optmizer algorithm' % self.trn_params.params[
                'optmizerAlgorithm']

        neurons_str = self.getNeuronsString(data, hidden_neurons[:layer])

        if regularizer != None and len(regularizer) != 0:
            model_str = '%s/%s/%s_%i_folds_%s_%s_neurons' % (
                self.save_path, self.analysis_str, self.prefix_str,
                self.n_folds, self.params_str, neurons_str)
        else:
            model_str = '%s/%s/%s_%i_folds_%s_%s_neurons' % (
                self.save_path, self.analysis_str, self.prefix_str,
                self.n_folds, self.params_str, neurons_str)
        if not self.development_flag:
            file_name = '%s_sort_%i_etbin_%i_etabin_%i_layer_%i_model.h5' % (
                model_str, sort, etBinIdx, etaBinIdx, self._layerNumber)
            #file_name = '%s_fold_%i_model.h5'%(model_str,ifold)
            #print file_name
            if os.path.exists(file_name):
                #print 'LOSSSS'
                #print self.trn_params.params['loss']
                #print self.lossFunction
                custom_obj = {}
                if self._aetype == 'contractive':
                    # def contractive_loss(y_pred, y_true):
                    # lam = 1e-4
                    # mse = K.mean(K.square(y_true - y_pred), axis=1)

                    # W = K.variable(value=model.get_layer('encoded').get_weights()[0])  # N x N_hidden
                    # W = K.transpose(W)  # N_hidden x N
                    # h = model.get_layer('encoded').output
                    # dh = h * (1 - h)  # N_batch x N_hidden

                    # # N_batch x N_hidden * N_hidden x 1 = N_batch x 1
                    # contractive = lam * K.sum(dh**2 * K.sum(W**2, axis=1), axis=1)

                    # return mse + contractive
                    # #usedloss=contractive_loss
                    # #self.lossFunction=
                    from TuningTools.MetricsLosses import contractive_loss
                    custom_obj['contractive_loss'] = contractive_loss(
                        hidden_neurons[layer - 1], data.shape[1],
                        self.trn_params.params['hidden_activation'],
                        self.trn_params.params['output_activation'])
                else:
                    custom_obj[
                        self.trn_params.params['loss']] = self.lossFunction
                    #usedloss=self.lossFunction
                #print usedloss

                if self.trn_params.params['verbose']:
                    print 'File %s exists' % (file_name)
                # load model
                file_name = '%s_sort_%i_etbin_%i_etabin_%i_layer_%i_model.h5' % (
                    model_str, sort, etBinIdx, etaBinIdx, self._layerNumber)
                #file_name  = '%s_fold_%i_model.h5'%(model_str,ifold)
                #classifier = load_model(file_name, custom_objects={'loss': usedloss}) ###self.lossFunction})
                classifier = load_model(file_name, custom_objects=custom_obj)
                #classifier = load_model(file_name, custom_objects={'%s'%self.trn_params.params['loss']: self.lossFunction})
                file_name = '%s_sort_%i_etbin_%i_etabin_%i_layer_%i_trn_desc.jbl' % (
                    model_str, sort, etBinIdx, etaBinIdx, self._layerNumber)
                trn_desc = joblib.load(file_name)

                file_name_prefix = '%s_sort_%i_etbin_%i_etabin_%i_layer_%i' % (
                    model_str, sort, etBinIdx, etaBinIdx, self._layerNumber)
                with open(self.save_path + tuning_folder, 'a+') as t_file:
                    t_file.write(file_name_prefix + "\n")
                t_file.close()
                return ifold, classifier, trn_desc
        else:
            file_name = '%s_sort_%i_etbin_%i_etabin_%i_model_dev.h5' % (
                model_str, sort, etBinIdx, etaBinIdx)
            #file_name = '%s_fold_%i_model_dev.h5'%(model_str,ifold)
            if os.path.exists(file_name):
                if self.trn_params.params['verbose']:
                    print 'File %s exists' % (file_name)
                # load model
                file_name = '%s_sort_%i_etbin_%i_etabin_%i_model_dev.h5' % (
                    model_str, sort, etBinIdx, etaBinIdx)
                #file_name  = '%s_fold_%i_model_dev.h5'%(model_str,ifold)
                classifier = load_model(
                    file_name,
                    custom_objects={
                        '%s' % self.trn_params.params['loss']:
                        self.lossFunction
                    })
                file_name = '%s_sort_%i_etbin_%i_etabin_%i_trn_desc_dev.jbl' % (
                    model_str, sort, etBinIdx, etaBinIdx)
                trn_desc = joblib.load(file_name)
                return ifold, classifier, trn_desc

        #train_id, test_id = self.CVO[ifold]

        #norm_data = self.normalizeData(data, ifold)
        #norm_data = data

        best_init = 0
        best_loss = 9999999

        classifier = []
        trn_desc = {}

        print 'Number of SAE training inits: ' + str(self.n_inits)
        for i_init in range(self.n_inits):
            #print 'Number of SAE training inits: '+ str(self.n_inits)
            print 'Layer: %i - Neuron: %i - Fold %i of %i Folds -  Init %i of %i Inits' % (
                self._layerNumber, hidden_neurons[layer - 1], ifold + 1,
                self.n_folds, i_init + 1, self.n_inits)
            model = Sequential()
            #proj_all_data = norm_data
            proj_all_data = data
            if layer == 1:
                print 'LAYER 1'
                print hidden_neurons[layer - 1], data.shape[1]
                if regularizer == 'l1':
                    model.add(
                        Dense(hidden_neurons[layer - 1],
                              input_dim=data.shape[1],
                              activity_regularizer=regularizers.l1(
                                  regularizer_param),
                              name='encoded'))
                else:
                    model.add(
                        Dense(hidden_neurons[layer - 1],
                              input_dim=data.shape[1],
                              name='encoded'))
                model.add(
                    Activation(self.trn_params.params['hidden_activation']))
                if regularizer == "dropout":
                    model.add(Dropout(regularizer_param))
                #elif regularizer == "l1":
                #    model.regularizers = [l1(regularizer_param)]
                #elif regularizer == "l2":
                #    model.regularizers = [l2(regularizer_param)]
                model.add(Dense(data.shape[1]))
                model.add(
                    Activation(self.trn_params.params['output_activation']))
            elif layer > 1:
                for ilayer in range(1, layer):
                    print hidden_neurons[:ilayer]
                    neurons_str = self.getNeuronsString(
                        data, hidden_neurons[:ilayer])
                    previous_model_str = '%s/%s/%s_%i_folds_%s_%s_neurons' % (
                        self.save_path, self.analysis_str, self.prefix_str,
                        self.n_folds, self.params_str, neurons_str)
                    if not self.development_flag:
                        file_name = '%s_sort_%i_etbin_%i_etabin_%i_layer_%i_model.h5' % (
                            previous_model_str, sort, etBinIdx, etaBinIdx,
                            self._layerNumber)
                        #file_name = '%s_fold_%i_model.h5'%(previous_model_str,ifold)
                    else:
                        file_name = '%s_sort_%i_etbin_%i_etabin_%i_model_dev.h5' % (
                            previous_model_str, sort, etBinIdx, etaBinIdx)
                        #file_name = '%s_fold_%i_model_dev.h5'%(previous_model_str,ifold)

                    # Check if previous layer model was trained
                    if not os.path.exists(file_name):
                        self.trainLayer(data=data,
                                        trgt=trgt,
                                        transformed_data=transformed_data,
                                        ifold=ifold,
                                        hidden_neurons=hidden_neurons[:ilayer],
                                        layer=ilayer,
                                        folds_sweep=True)

                    custom_obj = {}
                    if self._aetype == 'contractive':
                        from TuningTools.MetricsLosses import contractive_loss
                        custom_obj['contractive_loss'] = contractive_loss(
                            hidden_neurons[layer - 1], data.shape[1],
                            self.trn_params.params['hidden_activation'],
                            self.trn_params.params['output_activation'])
                    else:
                        custom_obj[
                            self.trn_params.params['loss']] = self.lossFunction
                    #print "Loading Model: "+file_name
                    layer_model = load_model(file_name,
                                             custom_objects=custom_obj)
                    #layer_model = load_model(file_name, custom_objects={'%s'%self.trn_params.params['loss']: self.lossFunction})

                    get_layer_output = K.function(
                        [layer_model.layers[0].input],
                        [layer_model.layers[1].output])
                    # Projection of layer
                    proj_all_data = get_layer_output([proj_all_data])[0]

                model.add(
                    Dense(hidden_neurons[layer - 1],
                          input_dim=proj_all_data.shape[1],
                          name='encoded'))
                model.add(
                    Activation(self.trn_params.params['hidden_activation']))
                if regularizer == "dropout":
                    model.add(Dropout(regularizer_param))
                elif regularizer == "l1":
                    model.regularizers = [l1(regularizer_param)]
                elif regularizer == "l2":
                    model.regularizers = [l2(regularizer_param)]
                model.add(Dense(proj_all_data.shape[1]))
                model.add(
                    Activation(self.trn_params.params['output_activation']))
                #norm_data = proj_all_data
                data = proj_all_data
            # end of elif layer > 1:

            if self._aetype == 'contractive':

                def contractive_loss(y_pred, y_true):
                    lam = 1e-4
                    mse = K.mean(K.square(y_true - y_pred), axis=1)

                    W = K.variable(value=model.get_layer(
                        'encoded').get_weights()[0])  # N x N_hidden
                    W = K.transpose(W)  # N_hidden x N
                    h = model.get_layer('encoded').output
                    dh = h * (1 - h)  # N_batch x N_hidden

                    # N_batch x N_hidden * N_hidden x 1 = N_batch x 1
                    contractive = lam * K.sum(dh**2 * K.sum(W**2, axis=1),
                                              axis=1)

                    return mse + contractive

                usedloss = contractive_loss
            else:
                usedloss = self.lossFunction
            model.compile(
                loss=usedloss,  #self.lossFunction,
                optimizer=self.optmizer,
                metrics=self.trn_params.params['metrics'])

            # Train model
            earlyStopping = callbacks.EarlyStopping(
                monitor='val_loss',
                patience=5,  #self.trn_params.params['patience'],
                verbose=self.trn_params.params['train_verbose'],
                mode='auto')

            ae_encoding_name = '%s_%i_folds_%s_%s_neurons' % (
                self.prefix_str, self.n_folds, self.params_str, neurons_str)
            ae_encoding = ae_encoding_name.split('_')
            ae_encoding_string = ae_encoding[0] + '_' + ae_encoding[
                24] + '_sort_%i_et_%i_eta_%i_layer_%i' % (
                    sort, etBinIdx, etaBinIdx, self._layerNumber)
            tbCallBack = keras.callbacks.TensorBoard(
                log_dir='/home/caducovas/tensorboard/EncodingError/' +
                ae_encoding_string,
                histogram_freq=1,
                write_graph=True,
                write_images=True,
                write_grads=True,
                update_freq='batch')

            import time
            import datetime
            start_run = time.time()
            if self._aetype == 'discriminant':
                init_trn_desc = model.fit(
                    data,
                    transformed_data,
                    nb_epoch=300,  #self.trn_params.params['n_epochs'],
                    batch_size=1024,  #self.trn_params.params['batch_size'],
                    callbacks=[earlyStopping],  #, tbCallBack],
                    verbose=1)

            else:
                init_trn_desc = model.fit(
                    data,
                    data,
                    nb_epoch=self.trn_params.params['n_epochs'],
                    batch_size=1024,  #self.trn_params.params['batch_size'],
                    callbacks=[earlyStopping],  #, tbCallBack],
                    verbose=1,  #self.trn_params.params['verbose'],
                    validation_data=(trgt, trgt))

            end_run = time.time()
            print 'Model took ' + str(
                datetime.timedelta(seconds=(end_run -
                                            start_run))) + ' to finish.'

            if self._aetype == 'discriminant':
                classifier = model
                trn_desc['epochs'] = init_trn_desc.epoch
                trn_desc['loss'] = init_trn_desc.history['loss']
            else:
                if np.min(init_trn_desc.history['val_loss']) < best_loss:
                    best_init = i_init
                    best_loss = np.min(init_trn_desc.history['val_loss'])
                    classifier = model
                    trn_desc['epochs'] = init_trn_desc.epoch

                    #for imetric in range(len(self.trn_params.params['metrics'])):
                    #    if self.trn_params.params['metrics'][imetric] == 'kullback_leibler_divergence':
                    #        metric = kullback_leibler_divergence
                    #    else:
                    #        metric = self.trn_params.params['metrics'][imetric]
                    #    trn_desc[metric] = init_trn_desc.history[metric]
                    #    trn_desc['val_'+metric] = init_trn_desc.history['val_'+metric]

                    trn_desc['loss'] = init_trn_desc.history['loss']
                    trn_desc['val_losS'] = init_trn_desc.history['val_loss']
                    trn_desc[
                        'kullback_leibler_divergence'] = init_trn_desc.history[
                            'kullback_leibler_divergence']
                    trn_desc[
                        'val_kullback_leibler_divergence'] = init_trn_desc.history[
                            'val_kullback_leibler_divergence']

        model.summary()
        # save model
        if not self.development_flag:
            file_name = '%s_sort_%i_etbin_%i_etabin_%i_layer_%i_model.h5' % (
                model_str, sort, etBinIdx, etaBinIdx, self._layerNumber)
            #file_name = '%s_fold_%i_model.h5'%(model_str,ifold)
            classifier.save(file_name)
            file_name = '%s_sort_%i_etbin_%i_etabin_%i_layer_%i_trn_desc.jbl' % (
                model_str, sort, etBinIdx, etaBinIdx, self._layerNumber)
            #file_name = '%s_fold_%i_trn_desc.jbl'%(model_str,ifold)
            joblib.dump([trn_desc], file_name, compress=9)

            file_name_prefix = '%s_sort_%i_etbin_%i_etabin_%i_layer_%i' % (
                model_str, sort, etBinIdx, etaBinIdx, self._layerNumber)
            with open(self.save_path + tuning_folder, 'a+') as t_file:
                t_file.write(file_name_prefix + "\n")
            t_file.close()

        else:
            file_name = '%s_sort_%i_etbin_%i_etabin_%i_model_dev.h5' % (
                model_str, sort, etBinIdx, etaBinIdx)
            #file_name = '%s_fold_%i_model_dev.h5'%(model_str,ifold)
            classifier.save(file_name)
            file_name = '%s_sort_%i_etbin_%i_etabin_%i_trn_desc_dev.jbl' % (
                model_str, sort, etBinIdx, etaBinIdx)
            #file_name = '%s_fold_%i_trn_desc_dev.jbl'%(model_str,ifold)
            joblib.dump([trn_desc], file_name, compress=9)
        return ifold, classifier, trn_desc