示例#1
0
    def test_cross_validation(self):
        all_prediction_results = list()
        for corresp_tissue in range(1, 24):
            cv = CrossValidation(genes=self.genes,
                                 all_gnids=self.gnids,
                                 class_size=self.class_size,
                                 fold_size=self.fold_size,
                                 kmer_size=self.kmer_size,
                                 exp_setting=self.exp_setting)
            cv.build_datasets(assigned_genes=self.assigned_gnids,
                              neg_class_mode=self.exp_setting.get_neg_class_mode(),
                              corresp_tissue=corresp_tissue)
            prediction_results = cv.validation()
            all_prediction_results.append(prediction_results)

        feature_vector = dict()
        for gnid in self.gnids:
            gnid_vector = list()
            for prediction_results in all_prediction_results:
                if prediction_results:
                    gnid_pr = prediction_results.get(gnid)
                    if gnid_pr is None:
                        gnid_vector.append('?')
                    else:
                        gnid_vector.append(gnid_pr.get_predicted_class())
            feature_vector[gnid] = gnid_vector
示例#2
0
文件: Data.py 项目: towardtruth/nbk
    def cross_validation(self):
        # for each feature, build validation groups with fold size
        print('Cross-Validation')
        for feature in self.features:
            print('Feature Name: {}, k-mer size: {}'.format(
                feature.name, self.kmer_size))

            # get wd_all_gnids per tissue
            scid = feature.corresp_tissue
            #wd_all_gnids_per_tissue = GetData.wd_all_gnid_per_tissue(self.exp_setting, scid)

            cr_validation = CrossValidation(
                genes=self.genes,
                #all_gnids=wd_all_gnids_per_tissue,
                all_gnids=None,
                class_size=self.class_size,
                fold_size=self.fold_size,
                kmer_size=self.kmer_size,
                exp_setting=self.exp_setting)

            cr_validation.build_datasets(
                assigned_genes=feature.assigned_genes,
                neg_class_mode=self.exp_setting.get_neg_class_mode(),
                corresp_tissue=feature.corresp_tissue)

            # Do validation and get prediction results
            prediction_results = cr_validation.validation()
            # store prediction results in each feature
            feature.set_prediction_results(
                prediction_restuls=prediction_results)

            # store confusion matrix in each feature
            #feature.set_confusion_matrix_set(cm_set=cm_set)
            feature.set_confusion_matrix_set(cm_set=self.set_confusion_matrix(
                validation=cr_validation, fold_size=self.fold_size))
示例#3
0
    def test_prediction_results(self):
        all_prediction_results = list()
        for corresp_tissue in range(1, 24):
            cv = CrossValidation(genes=self.genes,
                                 all_gnids=self.gnids,
                                 class_size=self.class_size,
                                 fold_size=self.fold_size,
                                 kmer_size=self.kmer_size,
                                 exp_setting=self.exp_setting)
            cv.build_datasets(assigned_genes=self.assigned_gnids,
                              neg_class_mode=self.exp_setting.get_neg_class_mode(),
                              corresp_tissue=corresp_tissue)
            prediction_results = cv.validation()
            all_prediction_results.append(prediction_results)

        feature_vector = dict()
        for gnid in self.gnids:
            gnid_vector = list()
            for prediction_results in all_prediction_results:
                if prediction_results:
                    gnid_pr = prediction_results.get(gnid)
                    if gnid_pr is None:
                        gnid_vector.append('?')
                    else:
                        gnid_vector.append(gnid_pr.get_predicted_class())
            feature_vector[gnid] = gnid_vector

        # show feature_vector
        for tissue in range(1, 24):
            print('Tissue#:', tissue)
            for gnid, vector in feature_vector.items():
                line = ",".join(str(value) for value in vector)
                p_results = all_prediction_results[tissue - 1].get(gnid)
                if p_results is None:
                    data_label = '?'
                else:
                    data_label = 'data_label:{}'.format(p_results.get_assigned_class())
                print("%s,%s,%s\n" % (gnid, line, data_label))