def test_cross_validation(self): all_prediction_results = list() for corresp_tissue in range(1, 24): cv = CrossValidation(genes=self.genes, all_gnids=self.gnids, class_size=self.class_size, fold_size=self.fold_size, kmer_size=self.kmer_size, exp_setting=self.exp_setting) cv.build_datasets(assigned_genes=self.assigned_gnids, neg_class_mode=self.exp_setting.get_neg_class_mode(), corresp_tissue=corresp_tissue) prediction_results = cv.validation() all_prediction_results.append(prediction_results) feature_vector = dict() for gnid in self.gnids: gnid_vector = list() for prediction_results in all_prediction_results: if prediction_results: gnid_pr = prediction_results.get(gnid) if gnid_pr is None: gnid_vector.append('?') else: gnid_vector.append(gnid_pr.get_predicted_class()) feature_vector[gnid] = gnid_vector
def cross_validation(self): # for each feature, build validation groups with fold size print('Cross-Validation') for feature in self.features: print('Feature Name: {}, k-mer size: {}'.format( feature.name, self.kmer_size)) # get wd_all_gnids per tissue scid = feature.corresp_tissue #wd_all_gnids_per_tissue = GetData.wd_all_gnid_per_tissue(self.exp_setting, scid) cr_validation = CrossValidation( genes=self.genes, #all_gnids=wd_all_gnids_per_tissue, all_gnids=None, class_size=self.class_size, fold_size=self.fold_size, kmer_size=self.kmer_size, exp_setting=self.exp_setting) cr_validation.build_datasets( assigned_genes=feature.assigned_genes, neg_class_mode=self.exp_setting.get_neg_class_mode(), corresp_tissue=feature.corresp_tissue) # Do validation and get prediction results prediction_results = cr_validation.validation() # store prediction results in each feature feature.set_prediction_results( prediction_restuls=prediction_results) # store confusion matrix in each feature #feature.set_confusion_matrix_set(cm_set=cm_set) feature.set_confusion_matrix_set(cm_set=self.set_confusion_matrix( validation=cr_validation, fold_size=self.fold_size))
def test_prediction_results(self): all_prediction_results = list() for corresp_tissue in range(1, 24): cv = CrossValidation(genes=self.genes, all_gnids=self.gnids, class_size=self.class_size, fold_size=self.fold_size, kmer_size=self.kmer_size, exp_setting=self.exp_setting) cv.build_datasets(assigned_genes=self.assigned_gnids, neg_class_mode=self.exp_setting.get_neg_class_mode(), corresp_tissue=corresp_tissue) prediction_results = cv.validation() all_prediction_results.append(prediction_results) feature_vector = dict() for gnid in self.gnids: gnid_vector = list() for prediction_results in all_prediction_results: if prediction_results: gnid_pr = prediction_results.get(gnid) if gnid_pr is None: gnid_vector.append('?') else: gnid_vector.append(gnid_pr.get_predicted_class()) feature_vector[gnid] = gnid_vector # show feature_vector for tissue in range(1, 24): print('Tissue#:', tissue) for gnid, vector in feature_vector.items(): line = ",".join(str(value) for value in vector) p_results = all_prediction_results[tissue - 1].get(gnid) if p_results is None: data_label = '?' else: data_label = 'data_label:{}'.format(p_results.get_assigned_class()) print("%s,%s,%s\n" % (gnid, line, data_label))
def test_features_dataset(self): print('TEST features dataset') for feature in self.features: #print('Feature Name: {}'.format(feature.name)) #print('\tassigned genes: {}'.format(feature.assigned_genes)) # get wd_all_gnids per tissue scid = feature.corresp_tissue wd_all_gnids_per_tissue = GetData.wd_all_gnid_per_tissue( self.exp_setting, scid) cr_validation = CrossValidation( genes=self.genes, #all_gnids=self.wd_all_gnids, all_gnids=wd_all_gnids_per_tissue, class_size=self.class_size, fold_size=self.fold_size, kmer_size=self.kmer_size) cr_validation.build_datasets( assigned_genes=feature.assigned_genes, neg_class_mode=self.exp_setting.get_neg_class_mode(), corresp_tissue=feature.corresp_tissue) cr_validation.test_datasets()