示例#1
0
    def predict_with_existing(self,
                              X_train, y_train_regression, y_train_classification,
                              X_val, y_val_regression, y_val_classification,
                              X_test, y_test_regression, y_test_classification,
                              PMTNN_weight_file):
        model = self.setup_model_ensemble()
        model.load_weights(PMTNN_weight_file)

        y_pred_on_train = model.predict(X_train)
        y_pred_on_val = model.predict(X_val)
        y_pred_on_test = model.predict(X_test)

        print
        print('train precision: {}'.format(precision_auc_single(y_train_classification, y_pred_on_train)))
        print('train roc: {}'.format(roc_auc_single(y_train_classification, y_pred_on_train)))
        print('train bedroc: {}'.format(bedroc_auc_single(y_train_classification, y_pred_on_train)))
        print
        print('validation precision: {}'.format(precision_auc_single(y_val_classification, y_pred_on_val)))
        print('validation roc: {}'.format(roc_auc_single(y_val_classification, y_pred_on_val)))
        print('validation bedroc: {}'.format(bedroc_auc_single(y_val_classification, y_pred_on_val)))
        print
        print('test precision: {}'.format(precision_auc_single(y_test_classification, y_pred_on_test)))
        print('test roc: {}'.format(roc_auc_single(y_test_classification, y_pred_on_test)))
        print('test bedroc: {}'.format(bedroc_auc_single(y_test_classification, y_pred_on_test)))
        print

        for EF_ratio in self.EF_ratio_list:
            n_actives, ef, ef_max = enrichment_factor_single(y_test_classification, y_pred_on_test, EF_ratio)
            print('ratio: {}, EF: {},\tactive: {}'.format(EF_ratio, ef, n_actives))

        return
    def predict_with_existing(self, X_train, y_train, X_val, y_val, X_test,
                              y_test, PMTNN_weight_file):
        model = self.setup_model()
        model.load_weights(PMTNN_weight_file)

        y_pred_on_train = model.predict(X_train)[:, -1]
        y_train = y_train[:, -1]
        y_pred_on_val = model.predict(X_val)[:, -1]
        y_val = y_val[:, -1]
        if X_test is not None:
            y_pred_on_test = model.predict(X_test)[:, -1]
            y_test = y_test[:, -1]
        print y_train.shape, '\t', y_pred_on_test.shape

        print
        print('train precision: {}'.format(
            precision_auc_single(y_train, y_pred_on_train)))
        print('train roc: {}'.format(roc_auc_single(y_train, y_pred_on_train)))
        print('train bedroc: {}'.format(
            bedroc_auc_single(y_train, y_pred_on_train)))
        print
        print('validation precision: {}'.format(
            precision_auc_single(y_val, y_pred_on_val)))
        print('validation roc: {}'.format(roc_auc_single(y_val,
                                                         y_pred_on_val)))
        print('validation bedroc: {}'.format(
            bedroc_auc_single(y_val, y_pred_on_val)))
        print
        if X_test is not None:
            print('test precision: {}'.format(
                precision_auc_single(y_test, y_pred_on_test)))
            print('test roc: {}'.format(roc_auc_single(y_test,
                                                       y_pred_on_test)))
            print('test bedroc: {}'.format(
                bedroc_auc_single(y_test, y_pred_on_test)))
            print

        if X_test is not None:
            for EF_ratio in self.EF_ratio_list:
                n_actives, ef, ef_max = enrichment_factor_single(
                    y_test, y_pred_on_test, EF_ratio)
                nef = ef / ef_max
                print('ratio: {}, EF: {},\tactive: {}'.format(
                    EF_ratio, ef, n_actives))
                print('ratio: {}, NEF: {}'.format(EF_ratio, nef))

        return
    def train_and_predict(self,
                          X_train, y_train_regression, y_train_classification,
                          X_val, y_val_regression, y_val_classification,
                          X_test, y_test_regression, y_test_classification,
                          PMTNN_weight_file):
        model = self.setup_model()
        sw = get_sample_weight(self, y_train_regression)
        print 'Sample Weight\t', sw

        model.compile(loss=self.compile_loss, optimizer=self.compile_optimizer)
        model.fit(x=X_train, y=y_train_regression,
                  nb_epoch=self.fit_nb_epoch,
                  batch_size=self.fit_batch_size,
                  verbose=self.fit_verbose,
                  sample_weight=sw,
                  validation_data=[X_val, y_val_regression],
                  shuffle=True)
        model.save_weights(PMTNN_weight_file)

        y_pred_on_train = model.predict(X_train)
        y_pred_on_val = model.predict(X_val)
        if X_test is not None:
            y_pred_on_test = model.predict(X_test)

        print
        print('train precision: {}'.format(precision_auc_single(y_train_classification, y_pred_on_train)))
        print('train roc: {}'.format(roc_auc_single(y_train_classification, y_pred_on_train)))
        print('train bedroc: {}'.format(bedroc_auc_single(y_train_classification, y_pred_on_train)))
        print
        print('validation precision: {}'.format(precision_auc_single(y_val_classification, y_pred_on_val)))
        print('validation roc: {}'.format(roc_auc_single(y_val_classification, y_pred_on_val)))
        print('validation bedroc: {}'.format(bedroc_auc_single(y_val_classification, y_pred_on_val)))
        print
        if X_test is not None:
            print('test precision: {}'.format(precision_auc_single(y_test_classification, y_pred_on_test)))
            print('test roc: {}'.format(roc_auc_single(y_test_classification, y_pred_on_test)))
            print('test bedroc: {}'.format(bedroc_auc_single(y_test_classification, y_pred_on_test)))
            print

        if X_test is not None:
            for EF_ratio in self.EF_ratio_list:
                n_actives, ef, ef_max = enrichment_factor_single(y_test_classification, y_pred_on_test, EF_ratio)
                nef = ef / ef_max
                print('ratio: {}, EF: {},\tactive: {}'.format(EF_ratio, ef, n_actives))
                print('ratio: {}, NEF: {}'.format(EF_ratio, nef))

        return
示例#4
0
    def get_rf(self, X_train, y_train, X_val, y_val, X_test, y_test):
        max_features = 'log2'
        n_estimators = 4000
        min_samples_leaf = 1
        class_weight = 'balanced'
        rnd_state = 1337
        np.random.seed(seed=rnd_state)

        rf = RandomForestClassifier(n_estimators=n_estimators,
                                    max_features=max_features,
                                    min_samples_leaf=min_samples_leaf,
                                    n_jobs=3,
                                    class_weight=class_weight,
                                    random_state=rnd_state,
                                    oob_score=False,
                                    verbose=1)
        rf.fit(X_train, y_train)

        y_pred_on_train = reshape_data_into_2_dim(rf.predict(X_train))
        y_pred_on_val = reshape_data_into_2_dim(rf.predict(X_val))
        y_pred_on_test = reshape_data_into_2_dim(rf.predict(X_test))

        print('train precision: {}'.format(precision_auc_single(y_train, y_pred_on_train)))
        print('train roc: {}'.format(roc_auc_single(y_train, y_pred_on_train)))
        print('train bedroc: {}'.format(bedroc_auc_single(y_train, y_pred_on_train)))
        print
        print('validation precision: {}'.format(precision_auc_single(y_val, y_pred_on_val)))
        print('validation roc: {}'.format(roc_auc_single(y_val, y_pred_on_val)))
        print('validation bedroc: {}'.format(bedroc_auc_single(y_val, y_pred_on_val)))
        print
        print('test precision: {}'.format(precision_auc_single(y_test, y_pred_on_test)))
        print('test roc: {}'.format(roc_auc_single(y_test, y_pred_on_test)))
        print('test bedroc: {}'.format(bedroc_auc_single(y_test, y_pred_on_test)))
        print

        for EF_ratio in self.EF_ratio_list:
            n_actives, ef, ef_max = enrichment_factor_single(y_test, y_pred_on_test, EF_ratio)
            print('ratio: {}, EF: {},\tactive: {}'.format(EF_ratio, ef, n_actives))

        return rf
示例#5
0
    def get_EF_score_with_existing_model(self,
                                         X_test, y_test, y_test_classification,
                                         file_path, EF_ratio):
        model = self.setup_model_ensemble()
        model.load_weights(file_path)
        y_pred_on_test = model.predict(X_test)
        print('test precision: {}'.format(precision_auc_single(y_test_classification, y_pred_on_test)))
        print('test roc: {}'.format(roc_auc_single(y_test_classification, y_pred_on_test)))
        print('test bedroc: {}'.format(bedroc_auc_single(y_test_classification, y_pred_on_test)))
        print

        n_actives, ef, ef_max = enrichment_factor_single(y_test_classification, y_pred_on_test, EF_ratio)
        print('EF: {},\tactive: {}'.format(ef, n_actives))

        return
def get_EF_values_single_task(task, X_test, y_test, model_weight,
                              EF_ratio_list):
    model = task.setup_model()
    model.load_weights(model_weight)
    y_pred_on_test = model.predict(X_test)

    print('test precision: {}'.format(
        precision_auc_single(y_test, y_pred_on_test)))
    print('test roc: {}'.format(roc_auc_single(y_test, y_pred_on_test)))
    print('test bedroc: {}'.format(bedroc_auc_single(y_test, y_pred_on_test)))
    print

    ef_values = []
    ef_max_values = []
    for EF_ratio in EF_ratio_list:
        n_actives, ef, ef_max = enrichment_factor_single(
            y_test, y_pred_on_test, EF_ratio)
        ef_values.append(ef)
        ef_max_values.append(ef_max)
    return ef_values, ef_max_values
示例#7
0
    def train_and_predict(self,
                          X_train, y_train, y_train_classification,
                          X_val, y_val, y_val_classification,
                          X_test, y_test, y_test_classification,
                          mode):
        model = Sequential()
        conf = self.conf
        batch_normalizer_epsilon = conf['batch']['epsilon']
        batch_normalizer_mode = conf['batch']['mode']
        batch_normalizer_axis = conf['batch']['axis']
        batch_normalizer_momentum = conf['batch']['momentum']
        batch_normalizer_weights = conf['batch']['weights']
        batch_normalizer_beta_init = conf['batch']['beta_init']
        batch_normalizer_gamma_init = conf['batch']['gamma_init']
        batch_normalizer = BatchNormalization(epsilon=batch_normalizer_epsilon,
                                              mode=batch_normalizer_mode,
                                              axis=batch_normalizer_axis,
                                              momentum=batch_normalizer_momentum,
                                              weights=batch_normalizer_weights,
                                              beta_init=batch_normalizer_beta_init,
                                              gamma_init=batch_normalizer_gamma_init)

        if mode == 'classification':
            model.add(Dense(2048, input_dim=1024, init='glorot_normal', activation='relu'))
            model.add(Dropout(0.5))
            model.add(Dense(1024, init='glorot_normal', activation='relu'))
            model.add(Dropout(0.5))
            if self.batch_is_use:
                model.add(batch_normalizer)
            model.add(Dense(1, init='glorot_normal', activation='sigmoid'))
            model.compile(loss='binary_crossentropy', optimizer='adam')
        else:
            model.add(Dense(2048, input_dim=1024, init='glorot_normal', activation='sigmoid'))
            model.add(Dropout(0.5))
            model.add(Dense(1024, init='glorot_normal', activation='sigmoid'))
            model.add(Dropout(0.5))
            if self.batch_is_use:
                model.add(batch_normalizer)
            model.add(Dense(1, init='glorot_normal', activation='linear'))
            model.compile(loss='mse', optimizer='adam')

        model.fit(X_train, y_train,
                  batch_size=self.fit_batch_size,
                  nb_epoch=self.fit_nb_epoch,
                  verbose=self.fit_verbose,
                  validation_data=(X_val, y_val))

        y_pred_on_train = model.predict(X_train)
        y_pred_on_val = model.predict(X_val)
        y_pred_on_test = model.predict(X_test)

        print
        print 'this is mode ', mode
        print('train precision: {}'.format(precision_auc_single(y_train_classification, y_pred_on_train)))
        print('train roc: {}'.format(roc_auc_single(y_train_classification, y_pred_on_train)))
        print('train bedroc: {}'.format(bedroc_auc_single(y_train_classification, y_pred_on_train)))
        print
        print('validation precision: {}'.format(precision_auc_single(y_val_classification, y_pred_on_val)))
        print('validation roc: {}'.format(roc_auc_single(y_val_classification, y_pred_on_val)))
        print('validation bedroc: {}'.format(bedroc_auc_single(y_val_classification, y_pred_on_val)))
        print
        print('test precision: {}'.format(precision_auc_single(y_test_classification, y_pred_on_test)))
        print('test roc: {}'.format(roc_auc_single(y_test_classification, y_pred_on_test)))
        print('test bedroc: {}'.format(bedroc_auc_single(y_test_classification, y_pred_on_test)))
        print

        for EF_ratio in self.EF_ratio_list:
            n_actives, ef, ef_max = enrichment_factor_single(y_test_classification, y_pred_on_test, EF_ratio)
            print('ratio: {}, EF: {},\tactive: {}'.format(EF_ratio, ef, n_actives))

        return y_pred_on_test
示例#8
0
    def train_and_predict_ensemble(self,
                                   X_train, y_train_regression, y_train_classification,
                                   X_val, y_val_regression, y_val_classification,
                                   X_test, y_test_regression, y_test_classification,
                                   PMTNN_weight_file):
        model = self.setup_model_ensemble()
        # TODO: remove
        print model.summary()

        if self.weight_schema == 'weighted':
            loss_weight = {'classification_output_layer': 1., 'regression_output_layer': 100.}
        elif self.weight_schema == 'no_weight':
            loss_weight = {'classification_output_layer': 1., 'regression_output_layer': 1.}
        else:
            raise ValueError('Wrong weight schema. Should be no_weight, or weighted.')

        model.compile(optimizer=self.compile_optimizer,
                      loss={'classification_output_layer': 'binary_crossentropy', 'regression_output_layer': 'mse'},
                      loss_weights=loss_weight)

        model.fit({'input_layer': X_train},
                  {'classification_output_layer': y_train_classification,
                   'regression_output_layer': y_train_regression},
                  nb_epoch=self.fit_nb_epoch,
                  batch_size=self.fit_batch_size,
                  verbose=self.fit_verbose,
                  validation_data=({'input_layer': X_val},
                                   {'classification_output_layer': y_val_classification,
                                    'regression_output_layer': y_val_regression}),
                  shuffle=True)
        model.save_weights(PMTNN_weight_file)

        y_pred_on_train_ensemble = np.array(model.predict(X_train))
        y_pred_on_val_ensmble = np.array(model.predict(X_val))
        y_pred_on_test_ensemble = np.array(model.predict(X_test))

        print
        print 'TreeNet Ensemble'

        mode_list = ['TreeNet classification', 'TreeNet regression']
        for mode in range(2):
            print
            print mode_list[mode]
            y_pred_on_train = y_pred_on_train_ensemble[mode]
            y_pred_on_val = y_pred_on_val_ensmble[mode]
            y_pred_on_test = y_pred_on_test_ensemble[mode]

            print('train precision: {}'.format(precision_auc_single(y_train_classification, y_pred_on_train)))
            print('train roc: {}'.format(roc_auc_single(y_train_classification, y_pred_on_train)))
            print('train bedroc: {}'.format(bedroc_auc_single(y_train_classification, y_pred_on_train)))
            print
            print('validation precision: {}'.format(precision_auc_single(y_val_classification, y_pred_on_val)))
            print('validation roc: {}'.format(roc_auc_single(y_val_classification, y_pred_on_val)))
            print('validation bedroc: {}'.format(bedroc_auc_single(y_val_classification, y_pred_on_val)))
            print
            print('test precision: {}'.format(precision_auc_single(y_test_classification, y_pred_on_test)))
            print('test roc: {}'.format(roc_auc_single(y_test_classification, y_pred_on_test)))
            print('test bedroc: {}'.format(bedroc_auc_single(y_test_classification, y_pred_on_test)))
            print

            for EF_ratio in self.EF_ratio_list:
                n_actives, ef, ef_max = enrichment_factor_single(y_test_classification, y_pred_on_test, EF_ratio)
                print('ratio: {}, EF: {},\tactive: {}'.format(EF_ratio, ef, n_actives))

        return y_pred_on_test_ensemble
    def train_and_predict(self, X_train, y_train, X_val, y_val, X_test, y_test,
                          PMTNN_weight_file):
        model = self.setup_model()
        if self.early_stopping_option == 'auc':
            early_stopping = KeckCallBackOnROC(
                X_train,
                y_train,
                X_val,
                y_val,
                patience=self.early_stopping_patience,
                file_path=PMTNN_weight_file)
            callbacks = [early_stopping]
        elif self.early_stopping_option == 'precision':
            early_stopping = KeckCallBackOnPrecision(
                X_train,
                y_train,
                X_val,
                y_val,
                patience=self.early_stopping_patience,
                file_path=PMTNN_weight_file)
            callbacks = [early_stopping]
        else:
            callbacks = []

        cw = get_class_weight(self, y_train)
        print 'cw ', cw

        model.compile(loss=self.compile_loss, optimizer=self.compile_optimizer)
        model.fit(X_train,
                  y_train,
                  nb_epoch=self.fit_nb_epoch,
                  batch_size=self.fit_batch_size,
                  verbose=self.fit_verbose,
                  class_weight=cw,
                  shuffle=True,
                  callbacks=callbacks)

        if self.early_stopping_option == 'auc' or self.early_stopping_option == 'precision':
            model = early_stopping.get_best_model()
        y_pred_on_train = model.predict(X_train)
        y_pred_on_val = model.predict(X_val)
        if X_test is not None:
            y_pred_on_test = model.predict(X_test)

        print
        print('train precision: {}'.format(
            precision_auc_single(y_train, y_pred_on_train)))
        print('train roc: {}'.format(roc_auc_single(y_train, y_pred_on_train)))
        print('train bedroc: {}'.format(
            bedroc_auc_single(y_train, y_pred_on_train)))
        print
        print('validation precision: {}'.format(
            precision_auc_single(y_val, y_pred_on_val)))
        print('validation roc: {}'.format(roc_auc_single(y_val,
                                                         y_pred_on_val)))
        print('validation bedroc: {}'.format(
            bedroc_auc_single(y_val, y_pred_on_val)))
        print
        if X_test is not None:
            print('test precision: {}'.format(
                precision_auc_single(y_test, y_pred_on_test)))
            print('test roc: {}'.format(roc_auc_single(y_test,
                                                       y_pred_on_test)))
            print('test bedroc: {}'.format(
                bedroc_auc_single(y_test, y_pred_on_test)))
            print

        if X_test is not None:
            for EF_ratio in self.EF_ratio_list:
                n_actives, ef, ef_max = enrichment_factor_single(
                    y_test, y_pred_on_test, EF_ratio)
                nef = ef / ef_max
                print('ratio: {}, EF: {},\tactive: {}'.format(
                    EF_ratio, ef, n_actives))
                print('ratio: {}, NEF: {}'.format(EF_ratio, nef))

        return