def compute_statistics(self): """Returns final results of the user predcition :return: a dataframe with various metrics for each transcription factor. Must call :meth:`score` before. """ data = { 'Pearson': [], 'Spearman': [], 'Pearson_Log': [], "AUROC_8mer": [], "AUPR_8mer": [], "AUROC_probe": [], "AUPR_probe": [] } pb = progress_bar(self.Ntf, interval=1) for tf_index in range(1, self.Ntf + 1): dfdata = pd.read_csv(self._setfile(tf_index, "Data"), sep='\t', header=None) pearson = dfdata.corr('pearson').ix[0, 1] spearman = dfdata.corr('spearman').ix[0, 1] pearsonLog = np.log10(dfdata).corr('pearson').ix[0, 1] data['Pearson'].append(pearson) data['Pearson_Log'].append(pearsonLog) data['Spearman'].append(spearman) dvdata = self._dvs[tf_index] r = ROCDiscovery(dvdata.values) rocdata = r.get_statistics() auroc = r.compute_auc(roc=rocdata) aupr = r.compute_aupr(roc=rocdata) data['AUROC_8mer'].append(auroc) data['AUPR_8mer'].append(aupr) dvdata = self._dvps[tf_index] r = ROCDiscovery(dvdata.values) rocdata = r.get_statistics() auroc = r.compute_auc(roc=rocdata) aupr = r.compute_aupr(roc=rocdata) data['AUROC_probe'].append(auroc) data['AUPR_probe'].append(aupr) pb.animate(tf_index) df = pd.DataFrame(data) df = df[[ 'Pearson', u'Spearman', u'Pearson_Log', u'AUROC_8mer', u'AUPR_8mer', u'AUROC_probe', u'AUPR_probe' ]] return df
def compute_statistics(self): """Returns final results of the user predcition :return: a dataframe with various metrics for each transcription factor. Must call :meth:`score` before. """ data = {'Pearson': [], 'Spearman': [], 'Pearson_Log': [], "AUROC_8mer": [], "AUPR_8mer": [], "AUROC_probe": [], "AUPR_probe": []} pb = progress_bar(self.Ntf, interval=1) for tf_index in range(1, self.Ntf + 1): dfdata = pd.read_csv(self._setfile(tf_index, "Data"), sep='\t', header=None) pearson = dfdata.corr('pearson').ix[0,1] spearman = dfdata.corr('spearman').ix[0,1] pearsonLog = np.log10(dfdata).corr('pearson').ix[0,1] data['Pearson'].append(pearson) data['Pearson_Log'].append(pearsonLog) data['Spearman'].append(spearman) dvdata = self._dvs[tf_index] r = ROCDiscovery(dvdata.values) rocdata = r.get_statistics() auroc = r.compute_auc(roc=rocdata) aupr = r.compute_aupr(roc=rocdata) data['AUROC_8mer'].append(auroc) data['AUPR_8mer'].append(aupr) dvdata = self._dvps[tf_index] r = ROCDiscovery(dvdata.values) rocdata = r.get_statistics() auroc = r.compute_auc(roc=rocdata) aupr = r.compute_aupr(roc=rocdata) data['AUROC_probe'].append(auroc) data['AUPR_probe'].append(aupr) pb.animate(tf_index) df = pd.DataFrame(data) df = df[['Pearson', u'Spearman', u'Pearson_Log', u'AUROC_8mer', u'AUPR_8mer', u'AUROC_probe', u'AUPR_probe']] return df
def score(self, filename): """ :return: dictionay with AUC/AUPR metrics and score. """ self._load_proba() prediction = pd.read_csv(filename, sep='[ \t]', engine='python', header=None) gold = pd.read_csv(self.download_goldstandard(), sep='[ \t]', engine='python', header=None) prediction.columns = ['sequence', 'value'] gold.columns = ['sequence', 'value'] # merge the prediction and gold based on the sequence. data = pd.merge(prediction, gold, how='inner', on=['sequence'], suffixes=['_pred', '_gold']) # sory by prediction try: data.sort_values(by=['value_pred'], ascending=False, inplace=True) except: data.sort(columns=['value_pred'], ascending=False, inplace=True) data.columns = ['Sequence', 'prediction_values', 'prediction'] self.data = data from dreamtools.core.rocs import ROCDiscovery self.roc = ROCDiscovery(self.data['prediction']) self.roc.get_statistics() auroc = self.roc.compute_auc() aupr = self.roc.compute_aupr() P_AUPR = self._probability(self.aupr['X'][0], self.aupr['Y'][0], aupr) P_AUROC = self._probability(self.auroc['X'][0], self.auroc['Y'][0], auroc) score = np.mean(-np.log10([P_AUROC, P_AUPR])) return {'auroc':auroc, 'aupr':aupr, 'pval_aupr': P_AUPR, 'pval_auroc':P_AUROC, 'score':score}
class D5C1(Challenge): """A class dedicated to D5C1 challenge :: from dreamtools import D5C1 s = D5C1() filename = s.download_template() s.score(filename) Data and templates are downloaded from Synapse. You must have a login. """ def __init__(self): """.. rubric:: constructor """ super(D5C1, self).__init__('D5C1') self._path2data = os.path.split(os.path.abspath(__file__))[0] self._init() self.sub_challenges = [] def _init(self): # should download files from synapse if required. self._download_data('AUPR.mat', 'syn4560154') self._download_data('AUROC.mat', 'syn4560158') self._download_data('DREAM5_EAR_GoldStandard.tsv', 'syn4560182') self._download_data('DREAM5_EAR_myteam_Predictions.txt', 'syn4560167') def download_template(self): # should return full path to a template file return self.get_pathname('DREAM5_EAR_myteam_Predictions.txt') def download_goldstandard(self): # should return full path to a gold standard file return self.get_pathname('DREAM5_EAR_GoldStandard.tsv') def _load_proba(self): import scipy.io self.auroc = scipy.io.loadmat(self.get_pathname("AUROC.mat")) self.aupr = scipy.io.loadmat(self.get_pathname("AUPR.mat")) def score(self, filename): self._load_proba() prediction = pd.read_csv(filename, sep='[ \t]', engine='python', header=None) gold = pd.read_csv(self.download_goldstandard(), sep='[ \t]', engine='python', header=None) prediction.columns = ['sequence', 'value'] gold.columns = ['sequence', 'value'] # merge the prediction and gold based on the sequence. data = pd.merge(prediction, gold, how='inner', on=['sequence'], suffixes=['_pred', '_gold']) # sory by prediction data.sort(columns=['value_pred'], ascending=False, inplace=True) data.columns = ['Sequence', 'prediction_values', 'prediction'] self.data = data from dreamtools.core.rocs import ROCDiscovery self.roc = ROCDiscovery(self.data['prediction']) self.roc.get_statistics() auroc = self.roc.compute_auc() aupr = self.roc.compute_aupr() P_AUPR = self._probability(self.aupr['X'][0], self.aupr['Y'][0], aupr) P_AUROC = self._probability(self.auroc['X'][0], self.auroc['Y'][0], auroc) # overall dream score #i#P = [ p_auroc p_aupr ]; #o#verall_score = mean(-log10(P)')'; score = np.mean(-np.log10([P_AUROC, P_AUPR])) return {'auroc':auroc, 'aupr':aupr, 'pval_aupr': P_AUPR, 'pval_auroc':P_AUROC, 'score':score} def _probability(self, X, Y, x): dx = X[2] - X[1] return sum( Y[X>=x] * dx )
class D5C1(Challenge): """A class dedicated to D5C1 challenge :: from dreamtools import D5C1 s = D5C1() filename = s.download_template() s.score(filename) """ def __init__(self, verbose=True, download=True, **kargs): """.. rubric:: constructor""" super(D5C1, self).__init__('D5C1', verbose, download) self._init() self.sub_challenges = [] def _init(self): if self._standalone is True: return # should download files from synapse if required. self._download_data('AUPR.mat', 'syn4560154') self._download_data('AUROC.mat', 'syn4560158') self._download_data('DREAM5_EAR_GoldStandard.tsv', 'syn4560182') self._download_data('DREAM5_EAR_myteam_Predictions.txt', 'syn4560167') def download_template(self): # should return full path to a template file return self.get_pathname('DREAM5_EAR_myteam_Predictions.txt') def download_goldstandard(self): # should return full path to a gold standard file return self.get_pathname('DREAM5_EAR_GoldStandard.tsv') def _load_proba(self): import scipy.io self.auroc = scipy.io.loadmat(self.get_pathname("AUROC.mat")) self.aupr = scipy.io.loadmat(self.get_pathname("AUPR.mat")) def score(self, filename): """ :return: dictionay with AUC/AUPR metrics and score. """ self._load_proba() prediction = pd.read_csv(filename, sep='[ \t]', engine='python', header=None) gold = pd.read_csv(self.download_goldstandard(), sep='[ \t]', engine='python', header=None) prediction.columns = ['sequence', 'value'] gold.columns = ['sequence', 'value'] # merge the prediction and gold based on the sequence. data = pd.merge(prediction, gold, how='inner', on=['sequence'], suffixes=['_pred', '_gold']) # sory by prediction try: data.sort_values(by=['value_pred'], ascending=False, inplace=True) except: data.sort(columns=['value_pred'], ascending=False, inplace=True) data.columns = ['Sequence', 'prediction_values', 'prediction'] self.data = data from dreamtools.core.rocs import ROCDiscovery self.roc = ROCDiscovery(self.data['prediction']) self.roc.get_statistics() auroc = self.roc.compute_auc() aupr = self.roc.compute_aupr() P_AUPR = self._probability(self.aupr['X'][0], self.aupr['Y'][0], aupr) P_AUROC = self._probability(self.auroc['X'][0], self.auroc['Y'][0], auroc) score = np.mean(-np.log10([P_AUROC, P_AUPR])) return {'auroc':auroc, 'aupr':aupr, 'pval_aupr': P_AUPR, 'pval_auroc':P_AUROC, 'score':score} def _probability(self, X, Y, x): dx = X[2] - X[1] return sum( Y[X>=x] * dx )