def plotFigure(score_data=None,release=True):
	pl.ion()

	font_size = 13
	mpl.rcParams['font.family']="sans-serif"
	mpl.rcParams['font.sans-serif']="Arial"
	mpl.rcParams['font.size']=font_size
	mpl.rcParams['font.weight']='medium'
	mpl.rcParams['figure.dpi'] = 150
	mpl.rcParams['figure.facecolor'] = 'white'
	mpl.rcParams['lines.linewidth'] = 2
	mpl.rcParams['axes.facecolor'] = 'white'
	mpl.rcParams['patch.edgecolor'] = 'black'
	
	color_t = ['#F7977A','#FDC68A','#A2D39C','#6ECFF6','#8493CA','#BC8DBF','#F6989D',
		   '#FFF79A','#998675','#A4A4A4','#5AFF00','#29A3A3','#F53DD6','#F2800D','#3399FF']
	
	#load data
	val = utils.Validation()

	datasets = ['humvar','exovar','varibench_selected','predictSNP_selected','swissvar_selected']
	fig_names = ['FigureS14','FigureS15','FigureS16','FigureS17','FigureS18']
	for i,dataset in enumerate(datasets):
		score_data.selectDataset(dataset)
		labels = score_data.getTrueLabels()
		print "\tCreating ROC and ROC-PR curves for dataset:  " + dataset + " (" + fig_names[i] + ")"
		legend = []
		pl.figure(figsize=(12,6))

		fig = pl.subplot(121)
		pl.grid(True)
		fig.set_xlim([0,1])
		fig.set_ylim([0,1])
	
		spines_to_remove = ['top','right']
		ax = fig.get_axes()
		for spine in spines_to_remove:
			ax.spines[spine].set_visible(False)
		ax.xaxis.set_ticks_position('bottom')
		ax.yaxis.set_ticks_position('left')
	
		legend = plot_roc(results=val.getROCStats(labels,score_data.getScores('fathmm_w')),
					name="FatHMM-W",
					legend=legend,
					color=color_t[5])
		legend = plot_roc(results=val.getROCStats(labels,score_data.getScores('logit_condel_new')),
					name="Logit+",
					legend=legend,
					color=color_t[14])
		legend = plot_roc(results=val.getROCStats(labels,score_data.getScores('condel_new')),
					name="Condel+",
					legend=legend,
					color=color_t[13])
		legend = plot_roc(results=val.getROCStats(labels,score_data.getScores('logit_condel_old')),
					name="Logit",
					legend=legend,
					color=color_t[12])
		legend = plot_roc(results=val.getROCStats(labels,score_data.getScores('condel_old')),
					name="Condel",
					legend=legend,
					color=color_t[11])
		legend = plot_roc(results=val.getROCStats(labels,score_data.getScores('polyphen2')),
					name="PolyPhen-2",
					legend=legend,
					color=color_t[1])
		legend = plot_roc(results=val.getROCStats(labels,score_data.getScores('mutationassessor')),
					name="MutationAssessor",
					legend=legend,
					color=color_t[8])
		legend = plot_roc(results=val.getROCStats(labels,score_data.getScores('sift')),
					name="SIFT",
					legend=legend,
					color=color_t[2])
		
		leg = ax.legend(legend,'lower right',numpoints=1,prop={'size':12},fancybox=True)
		leg.get_frame().set_alpha(0.5)
		fig.set_xlabel("False Positive Rate or (1-Specificity)")
		fig.set_ylabel("True Positive Rate or (Sensitivity)")
		fig.plot([0,1],[0,1],'--',color='#ACACAC')
		fig.text(-0.1,1.05,"a",fontsize=14,fontweight='bold',va='top',transform=fig.transAxes)
		
		fig = pl.subplot(122)
		legend = []
		pl.grid(True)
		fig.set_xlim([0,1])
		fig.set_ylim([0,1])
		
		spines_to_remove = ['top','right']
		ax = fig.get_axes()
		for spine in spines_to_remove:
			ax.spines[spine].set_visible(False)
		ax.xaxis.set_ticks_position('bottom')
		ax.yaxis.set_ticks_position('left')
		
		legend = plot_roc_pr(results=val.getROCStats(labels,score_data.getScores('fathmm_w')),
					name="FatHMM-W",
					legend=legend,
					color=color_t[5])
		legend = plot_roc_pr(results=val.getROCStats(labels,score_data.getScores('logit_condel_new')),
					name="Logit+",
					legend=legend,
					color=color_t[14])
		legend = plot_roc_pr(results=val.getROCStats(labels,score_data.getScores('condel_new')),
					name="Condel+",
					legend=legend,
					color=color_t[13])
		legend = plot_roc_pr(results=val.getROCStats(labels,score_data.getScores('logit_condel_old')),
					name="Logit",
					legend=legend,
					color=color_t[12])
		legend = plot_roc_pr(results=val.getROCStats(labels,score_data.getScores('condel_old')),
					name="Condel",
					legend=legend,
					color=color_t[11])
		legend = plot_roc_pr(results=val.getROCStats(labels,score_data.getScores('polyphen2')),
					name="PolyPhen-2",
					legend=legend,
					color=color_t[1])
		legend = plot_roc_pr(results=val.getROCStats(labels,score_data.getScores('mutationassessor')),
					name="MutationAssessor",
					legend=legend,
					color=color_t[8])
		legend = plot_roc_pr(results=val.getROCStats(labels,score_data.getScores('sift')),
					name="SIFT",
					legend=legend,
					color=color_t[2])
		
		leg = ax.legend(legend,'lower right',numpoints=1,prop={'size':12},fancybox=True)
		leg.get_frame().set_alpha(0.5)
		fig.set_xlabel("Recall")
		fig.set_ylabel("Precision")
		fig.text(-0.1,1.05,"b",fontsize=14,fontweight='bold',va='top',transform=fig.transAxes)
		
		pl.subplots_adjust(left=0.05,bottom=0.08,right=0.99,top=0.93,wspace=0.11)
		
		if release:
			pl.savefig(os.path.abspath('Output/Supplementary/' + fig_names[i] + '.pdf'))
		else:
			pl.savefig(os.path.abspath('Output/Supplementary/' + fig_names[i] + '.pdf'))
			pl.savefig(os.path.abspath('Output/Supplementary/' + fig_names[i] + '.tiff'),dpi=300)
			pl.savefig(os.path.abspath('Output/Supplementary/' + fig_names[i] + '.jpg'))
		pl.close()
示例#2
0
def plotFigure(score_data=None, release=True):
    pl.ion()

    font_size = 10
    mpl.rcParams['font.family'] = "sans-serif"
    mpl.rcParams['font.sans-serif'] = "Arial"
    mpl.rcParams['font.size'] = font_size
    mpl.rcParams['font.weight'] = 'medium'
    mpl.rcParams['figure.dpi'] = 150
    mpl.rcParams['figure.facecolor'] = 'white'
    mpl.rcParams['lines.linewidth'] = 2
    mpl.rcParams['axes.facecolor'] = 'white'
    mpl.rcParams['patch.edgecolor'] = 'black'

    color_t = [
        '#F7977A', '#FDC68A', '#A2D39C', '#6ECFF6', '#8493CA', '#BC8DBF',
        '#F6989D', '#FFF79A', '#998675', '#A4A4A4'
    ]

    val = utils.Validation()
    categories = ['cat1', 'cat2', 'cat3', 'cat4', 'cat5']

    n = len(categories)
    mt = sp.zeros(n)
    pp2 = sp.zeros(n)
    mass = sp.zeros(n)
    cadd = sp.zeros(n)
    sift = sp.zeros(n)
    lrt = sp.zeros(n)
    fathmmu = sp.zeros(n)
    fathmmw = sp.zeros(n)
    gerp = sp.zeros(n)
    phylop = sp.zeros(n)

    score_data.selectDataset('varibench_selected')
    score_data.loadCategories()
    for i, cat in enumerate(categories):
        if cat == "all":
            labels = score_data.getTrueLabels()
            mt[i] = val.getROCStats(
                labels, score_data.getScores('mutationtaster'))['auc']
            pp2[i] = val.getROCStats(labels,
                                     score_data.getScores('polyphen2'))['auc']
            mass[i] = val.getROCStats(
                labels, score_data.getScores('mutationassessor'))['auc']
            cadd[i] = val.getROCStats(labels,
                                      score_data.getScores('CADD'))['auc']
            sift[i] = val.getROCStats(labels,
                                      score_data.getScores('sift'))['auc']
            fathmmu[i] = val.getROCStats(
                labels, score_data.getScores('fathmm_u'))['auc']
            fathmmw[i] = val.getROCStats(
                labels, score_data.getScores('fathmm_w'))['auc']
            gerp[i] = val.getROCStats(labels,
                                      score_data.getScores('gerp++'))['auc']
            phylop[i] = val.getROCStats(labels,
                                        score_data.getScores('phylop'))['auc']
            lrt[i] = val.getROCStats(labels,
                                     score_data.getScores('lrt'))['auc']
        else:
            [labels,
             scores] = score_data.getData4Categorie(category=cat,
                                                    tool_name='mutationtaster')
            mt[i] = val.getROCStats(labels, scores)['auc']
            [labels,
             scores] = score_data.getData4Categorie(category=cat,
                                                    tool_name='polyphen2')
            pp2[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores
             ] = score_data.getData4Categorie(category=cat,
                                              tool_name='mutationassessor')
            mass[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores] = score_data.getData4Categorie(category=cat,
                                                            tool_name='CADD')
            cadd[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores] = score_data.getData4Categorie(category=cat,
                                                            tool_name='sift')
            sift[i] = val.getROCStats(labels, scores)['auc']
            [labels,
             scores] = score_data.getData4Categorie(category=cat,
                                                    tool_name='fathmm_u')
            fathmmu[i] = val.getROCStats(labels, scores)['auc']
            [labels,
             scores] = score_data.getData4Categorie(category=cat,
                                                    tool_name='fathmm_w')
            fathmmw[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores] = score_data.getData4Categorie(category=cat,
                                                            tool_name='gerp++')
            gerp[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores] = score_data.getData4Categorie(category=cat,
                                                            tool_name='phylop')
            phylop[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores] = score_data.getData4Categorie(category=cat,
                                                            tool_name='lrt')
            lrt[i] = val.getROCStats(labels, scores)['auc']

    pl.figure(figsize=(12, 8))
    fig = pl.subplot(311)
    font = FontProperties()
    font.set_weight('bold')
    pl.plot(fathmmw, 'o-', color=color_t[0])
    pl.plot(fathmmu, 'h-', color=color_t[1])
    pl.plot(mt, 'x-', color=color_t[2])
    pl.plot(mass, '<-', color=color_t[8])
    pl.plot(pp2, '>-', color=color_t[3])
    pl.plot(cadd, 'D-', color=color_t[9])
    pl.plot(sift, '1-', color=color_t[4])
    pl.plot(lrt, '2-', color=color_t[5])
    pl.plot(gerp, '3-', color=color_t[6])
    pl.plot(phylop, '4-', color=color_t[7])
    pl.ylim(0.53, 0.7)
    pl.xlim(-.1, 4.1)
    pl.yticks([0.53, 0.6, 0.7])
    pl.grid(axis='y')
    pl.ylabel("AUC")
    pl.xticks(sp.arange(5), [
        ']0.0,1.0[',
        '[0.1,0.9]',
        '[0.2,0.8]',
        '[0.3,0.7]',
        '[0.4,0.6]',
    ],
              fontsize=font_size,
              rotation=90)
    leg = pl.legend([
        'FatHMM-W', 'FatHMM-U', 'MT2', 'MASS', 'PP2', 'CADD', 'SIFT', 'LRT',
        'GERP++', 'pyhloP'
    ],
                    loc='upper right',
                    fancybox=True,
                    ncol=5,
                    prop={'size': 10},
                    numpoints=1)
    leg.get_frame().set_alpha(0.2)
    leg.get_frame().set_edgecolor("none")
    fig.text(-0.05,
             1.02,
             "a",
             fontsize=15,
             fontweight="bold",
             va="top",
             transform=fig.transAxes)
    pl.title("VariBenchSelected")
    remove_border()

    mt = sp.zeros(n)
    pp2 = sp.zeros(n)
    mass = sp.zeros(n)
    cadd = sp.zeros(n)
    sift = sp.zeros(n)
    lrt = sp.zeros(n)
    fathmmu = sp.zeros(n)
    fathmmw = sp.zeros(n)
    gerp = sp.zeros(n)
    phylop = sp.zeros(n)
    score_data.selectDataset('predictSNP_selected')
    score_data.loadCategories()
    for i, cat in enumerate(categories):
        if cat == "all":
            labels = score_data.getTrueLabels()
            mt[i] = val.getROCStats(
                labels, score_data.getScores('mutationtaster'))['auc']
            pp2[i] = val.getROCStats(labels,
                                     score_data.getScores('polyphen2'))['auc']
            mass[i] = val.getROCStats(
                labels, score_data.getScores('mutationassessor'))['auc']
            cadd[i] = val.getROCStats(labels,
                                      score_data.getScores('CADD'))['auc']
            sift[i] = val.getROCStats(labels,
                                      score_data.getScores('sift'))['auc']
            fathmmu[i] = val.getROCStats(
                labels, score_data.getScores('fathmm_u'))['auc']
            fathmmw[i] = val.getROCStats(
                labels, score_data.getScores('fathmm_w'))['auc']
            gerp[i] = val.getROCStats(labels,
                                      score_data.getScores('gerp++'))['auc']
            phylop[i] = val.getROCStats(labels,
                                        score_data.getScores('phylop'))['auc']
            lrt[i] = val.getROCStats(labels,
                                     score_data.getScores('lrt'))['auc']
        else:
            [labels,
             scores] = score_data.getData4Categorie(category=cat,
                                                    tool_name='mutationtaster')
            mt[i] = val.getROCStats(labels, scores)['auc']
            [labels,
             scores] = score_data.getData4Categorie(category=cat,
                                                    tool_name='polyphen2')
            pp2[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores
             ] = score_data.getData4Categorie(category=cat,
                                              tool_name='mutationassessor')
            mass[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores] = score_data.getData4Categorie(category=cat,
                                                            tool_name='CADD')
            cadd[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores] = score_data.getData4Categorie(category=cat,
                                                            tool_name='sift')
            sift[i] = val.getROCStats(labels, scores)['auc']
            [labels,
             scores] = score_data.getData4Categorie(category=cat,
                                                    tool_name='fathmm_u')
            fathmmu[i] = val.getROCStats(labels, scores)['auc']
            [labels,
             scores] = score_data.getData4Categorie(category=cat,
                                                    tool_name='fathmm_w')
            fathmmw[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores] = score_data.getData4Categorie(category=cat,
                                                            tool_name='gerp++')
            gerp[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores] = score_data.getData4Categorie(category=cat,
                                                            tool_name='phylop')
            phylop[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores] = score_data.getData4Categorie(category=cat,
                                                            tool_name='lrt')
            lrt[i] = val.getROCStats(labels, scores)['auc']
    fig = pl.subplot(312)
    font = FontProperties()
    font.set_weight('bold')
    pl.plot(fathmmw, 'o-', color=color_t[0])
    pl.plot(fathmmu, 'h-', color=color_t[1])
    pl.plot(mt, 'x-', color=color_t[2])
    pl.plot(mass, '<-', color=color_t[8])
    pl.plot(pp2, '>-', color=color_t[3])
    pl.plot(cadd, 'D-', color=color_t[9])
    pl.plot(sift, '1-', color=color_t[4])
    pl.plot(lrt, '2-', color=color_t[5])
    pl.plot(gerp, '3-', color=color_t[6])
    pl.plot(phylop, '4-', color=color_t[7])
    pl.ylim(0.55, 0.8)
    pl.xlim(-.1, 4.1)
    pl.yticks([0.55, 0.6, 0.7, 0.8])
    pl.grid(axis='y')
    pl.ylabel("AUC")
    pl.xticks(sp.arange(5), [
        ']0.0,1.0[',
        '[0.1,0.9]',
        '[0.2,0.8]',
        '[0.3,0.7]',
        '[0.4,0.6]',
    ],
              fontsize=font_size,
              rotation=90)
    fig.text(-0.05,
             1.02,
             "b",
             fontsize=15,
             fontweight="bold",
             va="top",
             transform=fig.transAxes)
    pl.title("predictSNPSelected")
    remove_border()

    mt = sp.zeros(n)
    pp2 = sp.zeros(n)
    mass = sp.zeros(n)
    cadd = sp.zeros(n)
    sift = sp.zeros(n)
    lrt = sp.zeros(n)
    fathmmu = sp.zeros(n)
    fathmmw = sp.zeros(n)
    gerp = sp.zeros(n)
    phylop = sp.zeros(n)
    score_data.selectDataset('swissvar_selected')
    score_data.loadCategories()
    for i, cat in enumerate(categories):
        if cat == "all":
            labels = score_data.getTrueLabels()
            mt[i] = val.getROCStats(
                labels, score_data.getScores('mutationtaster'))['auc']
            pp2[i] = val.getROCStats(labels,
                                     score_data.getScores('polyphen2'))['auc']
            mass[i] = val.getROCStats(
                labels, score_data.getScores('mutationassessor'))['auc']
            cadd[i] = val.getROCStats(labels,
                                      score_data.getScores('CADD'))['auc']
            sift[i] = val.getROCStats(labels,
                                      score_data.getScores('sift'))['auc']
            fathmmu[i] = val.getROCStats(
                labels, score_data.getScores('fathmm_u'))['auc']
            fathmmw[i] = val.getROCStats(
                labels, score_data.getScores('fathmm_w'))['auc']
            gerp[i] = val.getROCStats(labels,
                                      score_data.getScores('gerp++'))['auc']
            phylop[i] = val.getROCStats(labels,
                                        score_data.getScores('phylop'))['auc']
            lrt[i] = val.getROCStats(labels,
                                     score_data.getScores('lrt'))['auc']
        else:
            [labels,
             scores] = score_data.getData4Categorie(category=cat,
                                                    tool_name='mutationtaster')
            mt[i] = val.getROCStats(labels, scores)['auc']
            [labels,
             scores] = score_data.getData4Categorie(category=cat,
                                                    tool_name='polyphen2')
            pp2[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores
             ] = score_data.getData4Categorie(category=cat,
                                              tool_name='mutationassessor')
            mass[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores] = score_data.getData4Categorie(category=cat,
                                                            tool_name='CADD')
            cadd[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores] = score_data.getData4Categorie(category=cat,
                                                            tool_name='sift')
            sift[i] = val.getROCStats(labels, scores)['auc']
            [labels,
             scores] = score_data.getData4Categorie(category=cat,
                                                    tool_name='fathmm_u')
            fathmmu[i] = val.getROCStats(labels, scores)['auc']
            [labels,
             scores] = score_data.getData4Categorie(category=cat,
                                                    tool_name='fathmm_w')
            fathmmw[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores] = score_data.getData4Categorie(category=cat,
                                                            tool_name='gerp++')
            gerp[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores] = score_data.getData4Categorie(category=cat,
                                                            tool_name='phylop')
            phylop[i] = val.getROCStats(labels, scores)['auc']
            [labels, scores] = score_data.getData4Categorie(category=cat,
                                                            tool_name='lrt')
            lrt[i] = val.getROCStats(labels, scores)['auc']
    fig = pl.subplot(313)
    font = FontProperties()
    font.set_weight('bold')
    pl.plot(fathmmw, 'o-', color=color_t[0])
    pl.plot(fathmmu, 'h-', color=color_t[1])
    pl.plot(mt, 'x-', color=color_t[2])
    pl.plot(mass, '<-', color=color_t[8])
    pl.plot(pp2, '>-', color=color_t[3])
    pl.plot(cadd, 'D-', color=color_t[9])
    pl.plot(sift, '1-', color=color_t[4])
    pl.plot(lrt, '2-', color=color_t[5])
    pl.plot(gerp, '3-', color=color_t[6])
    pl.plot(phylop, '4-', color=color_t[7])
    pl.ylim(0.55, 0.73)
    pl.xlim(-.1, 4.1)
    pl.yticks([0.55, 0.6, 0.7, 0.75])
    pl.grid(axis='y')
    pl.ylabel("AUC")
    pl.xticks(sp.arange(5), [
        ']0.0,1.0[',
        '[0.1,0.9]',
        '[0.2,0.8]',
        '[0.3,0.7]',
        '[0.4,0.6]',
    ],
              fontsize=font_size,
              rotation=90)
    remove_border()
    fig.text(-0.05,
             1.02,
             "c",
             fontsize=15,
             fontweight="bold",
             va="top",
             transform=fig.transAxes)
    pl.title("SwissVarSelected")

    pl.subplots_adjust(left=0.06,
                       bottom=0.11,
                       right=0.98,
                       top=0.91,
                       wspace=0.03,
                       hspace=0.5)

    if release:
        pl.savefig(os.path.abspath('Output/Supplementary/FigureS12.pdf'))
    else:
        pl.savefig(os.path.abspath('Output/Supplementary/FigureS12.pdf'))
        pl.savefig(os.path.abspath('Output/Supplementary/FigureS12.tiff'),
                   dpi=300)
        pl.savefig(os.path.abspath('Output/Supplementary/FigureS12.jpg'))
    pl.close()
示例#3
0
def plotFigure(score_data=None,release=True):
    pl.ion()
    
    font_size = 13
    
    rc('font',**{'family':'sans-serif','sans-serif':['Arial']})
    
    color_t = ['#F7977A','#FDC68A','#A2D39C','#6ECFF6','#8493CA','#BC8DBF','#F6989D','#FFF79A','#998675','#A4A4A4','#5AFF00','#29A3A3']
    
    hatch = pl.Rectangle((0,0),1,1,fill=None,hatch="///")
    circle = pl.Rectangle((0,0),1,1,fill=None,hatch="ooo")
    
    #load data
    val = utils.Validation()
    mv_predictor = utils.ProteinMajorityVote()
    lr_predictor = utils.LogisticRegression()
    
    biased_map = {'mutationtaster':['humvar','exovar','varibench',
                    'varibench_selected','predictSNP_selected','swissvar_selected'],
              'mutationassessor':['humvar','exovar','varibench'],
              'polyphen2':['humvar','exovar','varibench'],
              'sift':[''],
              'fathmm_u':[''],
              'fathmm_w':['humvar','exovar','varibench'],
              'gerp++':[''],
              'phylop':[''],
              'CADD':['']
    }
    
    datasets = ['humvar','exovar','varibench_selected','predictSNP_selected','swissvar_selected']
    n_datasets = len(datasets)
    mt_biased = np.zeros(n_datasets)
    mt = np.zeros(n_datasets)
    mass_biased = np.zeros(n_datasets)
    mass = np.zeros(n_datasets)
    pp2_biased = np.zeros(n_datasets)
    pp2 = np.zeros(n_datasets)
    fathmmw = np.zeros(n_datasets)
    fathmmw_biased = np.zeros(n_datasets)
    fathmmw_type2_biased = np.zeros(n_datasets)
    sift = np.zeros(n_datasets)
    lrt = np.zeros(n_datasets)
    fathmmu = np.zeros(n_datasets)
    gerp = np.zeros(n_datasets)
    phylop = np.zeros(n_datasets)
    cadd = np.zeros(n_datasets)
    mv = np.zeros(n_datasets)
    mv_biased = np.zeros(n_datasets)
    features = np.zeros(n_datasets)
    features_biased = np.zeros(n_datasets)
    features_type2_biased = np.zeros(n_datasets)

    for i,dataset in enumerate(datasets):
        score_data.selectDataset(dataset)
        labels = score_data.getTrueLabels()
        print "\tPerforming a Logistic Regression over the weighting features of FatHMM-W for dataset: " + dataset
        lr_value = lr_predictor.run(true_labels=labels,features=score_data.getFatHMMFeatures(),folds=10)['auc']
        print "\tComputing AUC values for dataset: " + dataset
        if dataset in biased_map['mutationtaster']:
            mt_biased[i] = val.getROCStats(labels,score_data.getScores('mutationtaster'))['auc']
        else:
            mt[i] = val.getROCStats(labels,score_data.getScores('mutationtaster'))['auc']
        if dataset in biased_map['mutationassessor']:
            mass_biased[i] = val.getROCStats(labels,score_data.getScores('mutationassessor'))['auc']
        else:
            mass[i] = val.getROCStats(labels,score_data.getScores('mutationassessor'))['auc']
        if dataset in biased_map['polyphen2']:
            pp2_biased[i] = val.getROCStats(labels,score_data.getScores('polyphen2'))['auc']
        else:
            pp2[i] = val.getROCStats(labels,score_data.getScores('polyphen2'))['auc']
        if dataset in biased_map['fathmm_w']:
            fathmmw_biased[i] = val.getROCStats(labels,score_data.getScores('fathmm_w'))['auc']
            features_biased[i] = lr_value
        else:
            if dataset == "varibench_selected" or dataset=="predictSNP_selected" or dataset=="swissvar_selected":
                fathmmw_type2_biased[i] = val.getROCStats(labels,score_data.getScores('fathmm_w'))['auc']
                features_type2_biased[i] = lr_value
            else:
                fathmmw[i] = val.getROCStats(labels,score_data.getScores('fathmm_w'))['auc']
                features[i] = lr_value
        fathmmu[i] = val.getROCStats(labels,score_data.getScores('fathmm_u'))['auc']
        sift[i] = val.getROCStats(labels,score_data.getScores('sift'))['auc']
        lrt[i] = val.getROCStats(labels,score_data.getScores('lrt'))['auc']
        gerp[i] = val.getROCStats(labels,score_data.getScores('gerp++'))['auc']
        phylop[i] = val.getROCStats(labels,score_data.getScores('phylop'))['auc']
        cadd[i] = val.getROCStats(labels,score_data.getScores('CADD'))['auc']
        print "\tPerforming a Protein Majority Vote for dataset: " + dataset
        mv_biased[i] = mv_predictor.getMV4Dataset(true_labels=labels,proteins=score_data.getUniprotIDs(),folds=10)['auc']
    
    pl.figure(figsize=(15,5))
    fig1 = pl.subplot(111)

    width = 0.05

    x=np.arange(n_datasets)

    tool_names = np.array(['FatHMM-W','MutationTaster-2','PolyPhen-2','MutationAssessor','CADD','SIFT','LRT','FatHMM-U','Gerp++','phyloP','Features ln(Wn), ln(Wd)','Protein Majority Vote','Potentially Type 1 Biased','Potentially Type 2 Biased'])

    spines_to_remove = ['top','right','bottom']
    ax = fig1.get_axes()
    for spine in spines_to_remove:
        ax.spines[spine].set_visible(False)
    ax.xaxis.set_ticks_position('none')
    ax.yaxis.set_ticks_position('left')
    ax.yaxis.grid(True)

    t0 = fig1.bar(x-width/2.0-5*width-0.1,fathmmw,width=width,color=color_t[5])
    t1 = fig1.bar(x-width/2.0-4*width-0.08,mt,width=width,color=color_t[0])
    t2 = fig1.bar(x-width/2.0-3*width-0.06,pp2,width=width,color=color_t[1])
    t3 = fig1.bar(x-width/2.0-2*width-0.04,mass,width=width,color=color_t[8])
    t4 = fig1.bar(x-width/2.0-width-0.02,cadd,width=width,color=color_t[9])
    t5 = fig1.bar(x-width/2.0,sift,width=width,color=color_t[2])
    t6 = fig1.bar(x+width/2.0+0.02,lrt,width=width,color=color_t[3])
    t7 = fig1.bar(x+width/2.0+width+0.04,fathmmu,width=width,color=color_t[4])
    t8 = fig1.bar(x+width/2.0+2*width+0.06,gerp,width=width,color=color_t[6])
    t9 = fig1.bar(x+width/2.0+3*width+0.08,phylop,width=width,color=color_t[7])
    t10 = fig1.bar(x+width/2.0+4*width+0.1,features,width=width,color=color_t[11])
    t11 = fig1.bar(x+width/2.0+5*width+0.12,mv,width=width,color=color_t[10])

    #fig1.text(-0.05,1.02,"b",fontsize=15,fontweight="bold",va="top",transform=fig1.transAxes)

    light_grey = np.array([float(248)/float(255)]*3)
    light_grey = "#FFFFFF"
    almost_black = '#262626'
    legend = ax.legend([t0,t1,t2,t3,t4,t5,t6,t7,t8,t9,t10,t11,hatch,circle],tool_names,frameon=True, scatterpoints=1,
                prop={'size':11},ncol=7,loc="upper center",fancybox=True,bbox_to_anchor=(0.5, 1.02))
    legend.get_frame().set_alpha(0.5)
    rect = legend.get_frame()
    rect.set_facecolor(light_grey)
    rect.set_linewidth(0.0)
    # Change the legend label colors to almost black, too
    texts = legend.texts
    for t in texts:
        t.set_color(almost_black)

    fig1.bar(x-width/2.0-5*width-0.1,fathmmw_biased,width=width,color=color_t[5],hatch="/o/o/")
    fig1.bar(x-width/2.0-5*width-0.1,fathmmw_type2_biased,width=width,color=color_t[5],hatch="ooo")
    fig1.bar(x-width/2.0-4*width-0.08,mt_biased,width=width,color=color_t[0],hatch="///")
    fig1.bar(x-width/2.0-3*width-0.06,pp2_biased,width=width,color=color_t[1],hatch="///")
    fig1.bar(x-width/2.0-2*width-0.04,mass_biased,width=width,color=color_t[8],hatch="///")
    fig1.bar(x+width/2.0+4*width+0.1,features_biased,width=width,color=color_t[11],hatch="/o/o/")
    fig1.bar(x+width/2.0+4*width+0.1,features_type2_biased,width=width,color=color_t[11],hatch="ooo")
    fig1.bar(x+width/2.0+5*width+0.12,mv_biased,width=width,color=color_t[10],hatch="ooo")

    pl.xticks(x,['HumVar','ExoVar','VariBenchSelected','predictSNPSelected','SwissVarSelected'],fontsize=font_size)
    fig1.set_ylabel("AUC")
    fig1.set_ylim(0.5,1.06)
    fig1.set_xlim(-0.5,n_datasets-0.5)
    pl.yticks([0.45,0.5,0.55,0.6,0.65,0.7,0.75,0.8,0.85,0.9,0.95,1.0])
    pl.subplots_adjust(left=0.06,bottom=0.07,right=0.99,top=0.99,wspace=0.05)

    if release:
        pl.savefig(os.path.abspath('Output/Figures/Figure1.tiff'),dpi=300)
    else:
        pl.savefig(os.path.abspath('Output/Figures/Figure1.pdf'))
        pl.savefig(os.path.abspath('Output/Figures/Figure1.tiff'),dpi=300)
        pl.savefig(os.path.abspath('Output/Figures/Figure1.jpg'))
    pl.close()
def plotFigure(score_data=None, release=True):
    pl.ion()

    font_size = 13
    mpl.rcParams['font.family'] = "sans-serif"
    mpl.rcParams['font.sans-serif'] = "Arial"
    mpl.rcParams['font.size'] = font_size
    mpl.rcParams['font.weight'] = 'medium'
    mpl.rcParams['figure.dpi'] = 150
    mpl.rcParams['figure.facecolor'] = 'white'
    mpl.rcParams['lines.linewidth'] = 2
    mpl.rcParams['axes.facecolor'] = 'white'
    mpl.rcParams['font.size'] = font_size
    mpl.rcParams['patch.edgecolor'] = 'black'
    color_t = [
        '#F7977A', '#FDC68A', '#A2D39C', '#6ECFF6', '#8493CA', '#BC8DBF',
        '#F6989D', '#FFF79A', '#998675', '#A4A4A4', '#5AFF00', '#29A3A3',
        '#F53DD6', '#F2800D', '#3399FF'
    ]

    hatch = pl.Rectangle((0, 0), 1, 1, fill=None, hatch="///")
    circle = pl.Rectangle((0, 0), 1, 1, fill=None, hatch="ooo")

    #load data
    val = utils.Validation()
    biased_map = {
        'mutationtaster': [
            'humvar', 'exovar', 'varibench', 'varibench_selected',
            'swissvar_selected'
        ],
        'mutationassessor': ['humvar', 'exovar', 'varibench'],
        'polyphen2': ['humvar', 'exovar', 'varibench'],
        'logit_condel_old': ['humvar', 'exovar', 'varibench'],
        'logit_condel_new': ['humvar', 'exovar', 'varibench'],
        'condel_old': ['humvar', 'exovar', 'varibench'],
        'condel_new': ['humvar', 'exovar', 'varibench'],
        'sift': [''],
        'fathmm_u': [''],
        'fathmm_w': ['humvar', 'exovar', 'varibench'],
        'gerp++': [''],
        'phylop': [''],
        'CADD': ['']
    }

    datasets = [
        'humvar', 'exovar', 'varibench_selected', 'predictSNP_selected',
        'swissvar_selected'
    ]
    n = len(datasets)
    mass_biased = np.zeros(n)
    mass = np.zeros(n)
    pp2 = np.zeros(n)
    pp2_biased = np.zeros(n)
    sift = np.zeros(n)
    logit_biased = np.zeros(n)
    logit = np.zeros(n)
    condel_biased = np.zeros(n)
    condel = np.zeros(n)

    fathmmw = np.zeros(n)
    fathmmw_biased = np.zeros(n)
    fathmmw_type2_biased = np.zeros(n)
    logit_p_biased = np.zeros(n)
    logit_p_type2_biased = np.zeros(n)
    logit_p = np.zeros(n)
    condel_p_biased = np.zeros(n)
    condel_p_type2_biased = np.zeros(n)
    condel_p = np.zeros(n)

    for i, dataset in enumerate(datasets):
        score_data.selectDataset(dataset)
        labels = score_data.getTrueLabels()
        print "\tComputing AUC values for dataset: " + dataset
        if dataset in biased_map['mutationassessor']:
            mass_biased[i] = val.getROCStats(
                labels, score_data.getScores('mutationassessor'))['auc']
        else:
            mass[i] = val.getROCStats(
                labels, score_data.getScores('mutationassessor'))['auc']
        if dataset in biased_map['polyphen2']:
            pp2_biased[i] = val.getROCStats(
                labels, score_data.getScores('polyphen2'))['auc']
        else:
            pp2[i] = val.getROCStats(labels,
                                     score_data.getScores('polyphen2'))['auc']
        if dataset in biased_map['logit_condel_old']:
            logit_biased[i] = val.getROCStats(
                labels, score_data.getScores('logit_condel_old'))['auc']
        else:
            logit[i] = val.getROCStats(
                labels, score_data.getScores('logit_condel_old'))['auc']
        if dataset in biased_map['condel_old']:
            condel_biased[i] = val.getROCStats(
                labels, score_data.getScores('condel_old'))['auc']
        else:
            condel[i] = val.getROCStats(
                labels, score_data.getScores('condel_old'))['auc']
        if dataset in biased_map['fathmm_w']:
            fathmmw_biased[i] = val.getROCStats(
                labels, score_data.getScores('fathmm_w'))['auc']
        else:
            if dataset == "varibench_selected" or dataset == "predictSNP_selected" or dataset == "swissvar_selected":
                fathmmw_type2_biased[i] = val.getROCStats(
                    labels, score_data.getScores('fathmm_w'))['auc']
            else:
                fathmmw[i] = val.getROCStats(
                    labels, score_data.getScores('fathmm_w'))['auc']
        if dataset in biased_map['condel_new']:
            condel_p_biased[i] = val.getROCStats(
                labels, score_data.getScores('condel_new'))['auc']
        else:
            if dataset == "varibench_selected" or dataset == "predictSNP_selected" or dataset == "swissvar_selected":
                condel_p_type2_biased[i] = val.getROCStats(
                    labels, score_data.getScores('condel_new'))['auc']
            else:
                condel_p[i] = val.getROCStats(
                    labels, score_data.getScores('condel_new'))['auc']
        if dataset in biased_map['logit_condel_new']:
            logit_p_biased[i] = val.getROCStats(
                labels, score_data.getScores('logit_condel_new'))['auc']
        else:
            if dataset == "varibench_selected" or dataset == "predictSNP_selected" or dataset == "swissvar_selected":
                logit_p_type2_biased[i] = val.getROCStats(
                    labels, score_data.getScores('logit_condel_new'))['auc']
            else:
                logit_p[i] = val.getROCStats(
                    labels, score_data.getScores('logit_condel_new'))['auc']
        sift[i] = val.getROCStats(labels, score_data.getScores('sift'))['auc']

    pl.figure(figsize=(10, 5))
    fig1 = pl.subplot(111)

    width = 0.07

    x = np.arange(n)

    tool_names = np.array([
        'FatHMM-W', 'PolyPhen-2', 'MutationAssessor', 'SIFT', 'Condel',
        'Logit', 'Condel+', 'Logit+', 'Type 1 Biased', 'Type 2 Biased'
    ])

    spines_to_remove = ['top', 'right', 'bottom']
    ax = fig1.get_axes()
    for spine in spines_to_remove:
        ax.spines[spine].set_visible(False)
    ax.xaxis.set_ticks_position('none')
    ax.yaxis.set_ticks_position('left')
    ax.yaxis.grid(True)

    t0 = fig1.bar(x - width / 2.0 - 3 * width - 0.06,
                  fathmmw,
                  width=width,
                  color=color_t[5])
    t1 = fig1.bar(x - width / 2.0 - 2 * width - 0.04,
                  pp2,
                  width=width,
                  color=color_t[1])
    t2 = fig1.bar(x - width / 2.0 - width - 0.02,
                  mass,
                  width=width,
                  color=color_t[8])
    t3 = fig1.bar(x - width / 2.0, sift, width=width, color=color_t[2])
    t4 = fig1.bar(x + width / 2.0 + 0.02,
                  condel,
                  width=width,
                  color=color_t[11])
    t5 = fig1.bar(x + width / 2.0 + width + 0.04,
                  logit,
                  width=width,
                  color=color_t[12])
    t6 = fig1.bar(x + width / 2.0 + 2.0 * width + 0.06,
                  condel_p,
                  width=width,
                  color=color_t[13])
    t7 = fig1.bar(x + width / 2.0 + 3.0 * width + 0.08,
                  logit_p,
                  width=width,
                  color=color_t[14])

    light_grey = np.array([float(248) / float(255)] * 3)
    light_grey = "#FFFFFF"
    almost_black = '#262626'
    legend = ax.legend([t0, t1, t2, t3, t4, t5, t6, t7, hatch, circle],
                       tool_names,
                       frameon=True,
                       scatterpoints=1,
                       prop={'size': font_size},
                       ncol=5,
                       loc="upper center",
                       fancybox=True,
                       bbox_to_anchor=(0.5, 1.0))
    rect = legend.get_frame()
    rect.set_facecolor(light_grey)
    rect.set_linewidth(0.0)
    texts = legend.texts
    for t in texts:
        t.set_color(almost_black)

    fig1.bar(x - width / 2.0 - 3 * width - 0.06,
             fathmmw_biased,
             width=width,
             color=color_t[5],
             hatch="/o/o/")
    fig1.bar(x - width / 2.0 - 3 * width - 0.06,
             fathmmw_type2_biased,
             width=width,
             color=color_t[5],
             hatch="ooo")
    fig1.bar(x - width / 2.0 - 2 * width - 0.04,
             pp2_biased,
             width=width,
             color=color_t[1],
             hatch="///")
    fig1.bar(x - width / 2.0 - width - 0.02,
             mass_biased,
             width=width,
             color=color_t[8],
             hatch="///")
    fig1.bar(x + width / 2.0 + 0.02,
             condel_biased,
             width=width,
             color=color_t[11],
             hatch="///")
    fig1.bar(x + width / 2.0 + width + 0.04,
             logit_biased,
             width=width,
             color=color_t[12],
             hatch="///")
    fig1.bar(x + width / 2.0 + 2.0 * width + 0.06,
             condel_p_biased,
             width=width,
             color=color_t[13],
             hatch="/o/o/")
    fig1.bar(x + width / 2.0 + 3.0 * width + 0.08,
             logit_p_type2_biased,
             width=width,
             color=color_t[14],
             hatch="ooo")
    fig1.bar(x + width / 2.0 + 2.0 * width + 0.06,
             condel_p_type2_biased,
             width=width,
             color=color_t[13],
             hatch="ooo")
    fig1.bar(x + width / 2.0 + 3.0 * width + 0.08,
             logit_p_biased,
             width=width,
             color=color_t[14],
             hatch="/o/o/")

    pl.xticks(x, [
        'HumVar', 'ExoVar', 'VaribenchSelected', 'predictSNPSelected',
        'SwissVarSelected'
    ],
              fontsize=font_size)
    pl.ylabel("AUC")
    fig1.set_ylim(0.5, 1.03)
    fig1.set_xlim(-0.5, n - 0.5)
    fig1.text(-0.05,
              1.02,
              "a",
              fontsize=15,
              fontweight="bold",
              va="top",
              transform=fig1.transAxes)
    pl.yticks([0.5, 0.55, 0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9, 0.95, 1.0])
    pl.subplots_adjust(left=0.07,
                       bottom=0.07,
                       right=0.99,
                       top=0.97,
                       wspace=0.05)

    if release:
        pl.savefig(os.path.abspath('Output/Figures/Figure4.tiff'), dpi=300)
    else:
        pl.savefig(os.path.abspath('Output/Figures/Figure4.pdf'))
        pl.savefig(os.path.abspath('Output/Figures/Figure4.tiff'), dpi=300)
        pl.savefig(os.path.abspath('Output/Figures/Figure4.jpg'))
    pl.close()
def printTableS1(score_data=None):
    val = utils.Validation()
    f = open(os.path.abspath("Output/Supplementary/tableS1.csv"), 'w')
    f.write(";;MT2;PP2;MASS;CADD;SIFT;LRT;FatHMM-U;FatHMM-W;Gerp++;phyloP\n")
    datasets = [
        'humvar', 'exovar', 'varibench_selected', 'predictSNP_selected',
        'swissvar_selected'
    ]
    for i, dataset in enumerate(datasets):
        score_data.selectDataset(dataset)
        labels = score_data.getTrueLabels()
        string = dataset + ";TP;"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('mutationtaster'))['TP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('polyphen2'))['TP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('mutationassessor'))['TP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('CADD'))['TP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('sift'))['TP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('lrt'))['TP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('fathmm_u'))['TP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('fathmm_w'))['TP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('gerp++'))['TP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('phylop'))['TP']
        f.write(string + "\n")

        string = ";FP;"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('mutationtaster'))['FP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('polyphen2'))['FP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('mutationassessor'))['FP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('CADD'))['FP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('sift'))['FP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('lrt'))['FP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('fathmm_u'))['FP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('fathmm_w'))['FP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('gerp++'))['FP'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('phylop'))['FP']
        f.write(string + "\n")

        string = ";TN;"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('mutationtaster'))['TN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('polyphen2'))['TN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('mutationassessor'))['TN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('CADD'))['TN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('sift'))['TN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('lrt'))['TN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('fathmm_u'))['TN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('fathmm_w'))['TN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('gerp++'))['TN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('phylop'))['TN']
        f.write(string + "\n")

        string = ";FN;"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('mutationtaster'))['FN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('polyphen2'))['FN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('mutationassessor'))['FN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('CADD'))['FN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('sift'))['FN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('lrt'))['FN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('fathmm_u'))['FN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('fathmm_w'))['FN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('gerp++'))['FN'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('phylop'))['FN']
        f.write(string + "\n")

        string = ";AUC;"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('mutationtaster'))['auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('polyphen2'))['auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('mutationassessor'))['auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('CADD'))['auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('sift'))['auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('lrt'))['auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('fathmm_u'))['auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('fathmm_w'))['auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('gerp++'))['auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('phylop'))['auc']
        f.write(string + "\n")
        string = ";AUC-PR;"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('mutationtaster'))['pr_auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('polyphen2'))['pr_auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('mutationassessor'))['pr_auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('CADD'))['pr_auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('sift'))['pr_auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('lrt'))['pr_auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('fathmm_u'))['pr_auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('fathmm_w'))['pr_auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('gerp++'))['pr_auc'] + ";"
        string += "%.2f" % val.getROCStats(
            labels, score_data.getScores('phylop'))['pr_auc']
        f.write(string + "\n")
        string = ";Accuracy;"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('mutationtaster'))['accuracy'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('polyphen2'))['accuracy'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels(
                'mutationassessor'))['accuracy'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('CADD'))['accuracy'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('sift'))['accuracy'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('lrt'))['accuracy'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('fathmm_u'))['accuracy'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('fathmm_w'))['accuracy'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('gerp++'))['accuracy'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('phylop'))['accuracy']
        f.write(string + "\n")
        string = ";F-Score;"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('mutationtaster'))['f1_score'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('polyphen2'))['f1_score'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels(
                'mutationassessor'))['f1_score'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('CADD'))['f1_score'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('sift'))['f1_score'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('lrt'))['f1_score'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('fathmm_u'))['f1_score'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('fathmm_w'))['f1_score'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('gerp++'))['f1_score'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('phylop'))['f1_score']
        f.write(string + "\n")
        string = ";MCC;"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('mutationtaster'))['mcc'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('polyphen2'))['mcc'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('mutationassessor'))['mcc'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('CADD'))['mcc'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('sift'))['mcc'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('lrt'))['mcc'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('fathmm_u'))['mcc'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('fathmm_w'))['mcc'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('gerp++'))['mcc'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('phylop'))['mcc']
        f.write(string + "\n")

        string = ";Precision/Positive Predictive Value;"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('mutationtaster'))['precision'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('polyphen2'))['precision'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels(
                'mutationassessor'))['precision'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('CADD'))['precision'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('sift'))['precision'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('lrt'))['precision'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('fathmm_u'))['precision'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('fathmm_w'))['precision'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('gerp++'))['precision'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('phylop'))['precision']
        f.write(string + "\n")

        string = ";Recall/Sensitivity;"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('mutationtaster'))['recall'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('polyphen2'))['recall'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('mutationassessor'))['recall'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('CADD'))['recall'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('sift'))['recall'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('lrt'))['recall'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('fathmm_u'))['recall'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('fathmm_w'))['recall'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('gerp++'))['recall'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('phylop'))['recall']
        f.write(string + "\n")

        string = ";Specificity;"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels(
                'mutationtaster'))['specificity'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('polyphen2'))['specificity'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels(
                'mutationassessor'))['specificity'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('CADD'))['specificity'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('sift'))['specificity'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('lrt'))['specificity'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('fathmm_u'))['specificity'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('fathmm_w'))['specificity'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('gerp++'))['specificity'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('phylop'))['specificity']
        f.write(string + "\n")

        string = ";Negative Predictive Value;"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('mutationtaster'))['npv'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('polyphen2'))['npv'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels,
            score_data.getPredictedLabels('mutationassessor'))['npv'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('CADD'))['npv'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('sift'))['npv'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('lrt'))['npv'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('fathmm_u'))['npv'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('fathmm_w'))['npv'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('gerp++'))['npv'] + ";"
        string += "%.2f" % val.getPredictionStats(
            labels, score_data.getPredictedLabels('phylop'))['npv']
        f.write(string + "\n")
    f.close()
示例#6
0
def plotFigure(score_data=None, release=True):
    pl.ion()

    font_size = 10
    mpl.rcParams['font.family'] = "sans-serif"
    mpl.rcParams['font.sans-serif'] = "Arial"
    mpl.rcParams['font.size'] = font_size
    mpl.rcParams['figure.dpi'] = 150
    mpl.rcParams['font.weight'] = 'medium'
    mpl.rcParams['figure.facecolor'] = 'white'
    mpl.rcParams['lines.linewidth'] = 0.8
    mpl.rcParams['axes.facecolor'] = 'white'
    mpl.rcParams['font.size'] = font_size
    mpl.rcParams['patch.edgecolor'] = 'black'

    val = utils.Validation()
    categories = ['all', 'pure', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5']

    humvar = sp.zeros(7)
    exovar = sp.zeros(7)
    varibench_selected = sp.zeros(7)
    predictSNP_selected = sp.zeros(7)
    swissvar_selected = sp.zeros(7)

    datasets = [
        'humvar', 'exovar', 'varibench_selected', 'predictSNP_selected',
        'swissvar_selected'
    ]
    for dataset in datasets:
        score_data.selectDataset(dataset)
        score_data.loadCategories()
        for i, cat in enumerate(categories):
            if cat == "all":
                labels = score_data.getTrueLabels()
                if dataset == 'humvar':
                    humvar[i] = val.getROCStats(
                        labels, score_data.getScores('fathmm_w'))['auc']
                elif dataset == 'exovar':
                    exovar[i] = val.getROCStats(
                        labels, score_data.getScores('fathmm_w'))['auc']
                elif dataset == 'swissvar_selected':
                    swissvar_selected[i] = val.getROCStats(
                        labels, score_data.getScores('fathmm_w'))['auc']
                elif dataset == 'predictSNP_selected':
                    predictSNP_selected[i] = val.getROCStats(
                        labels, score_data.getScores('fathmm_w'))['auc']
                else:
                    varibench_selected[i] = val.getROCStats(
                        labels, score_data.getScores('fathmm_w'))['auc']
            else:
                [labels,
                 scores] = score_data.getData4Categorie(category=cat,
                                                        tool_name='fathmm_w')
                if dataset == 'humvar':
                    humvar[i] = val.getROCStats(labels, scores)['auc']
                elif dataset == 'exovar':
                    exovar[i] = val.getROCStats(labels, scores)['auc']
                elif dataset == 'predictSNP_selected':
                    predictSNP_selected[i] = val.getROCStats(labels,
                                                             scores)['auc']
                elif dataset == 'swissvar_selected':
                    swissvar_selected[i] = val.getROCStats(labels,
                                                           scores)['auc']
                else:
                    varibench_selected[i] = val.getROCStats(labels,
                                                            scores)['auc']
    pl.figure(figsize=(10, 7))
    font = FontProperties()
    font.set_weight('bold')

    plotBar(humvar, 0, 321, "HumVar", flag=False)
    ax = plotBar(exovar, 1, 322, "ExoVar", flag=True)
    plotBar(varibench_selected, 3, 323, "VariBenchSelected", flag=False)
    plotBar(predictSNP_selected, 4, 324, "predictSNPSelected", flag=True)
    plotBar(swissvar_selected, 5, 325, "SwissVarSelected", flag=False)

    rect = pl.Rectangle((0, 0), 1, 1, fill=None)

    leg = ax.legend([rect, hatch, circle],
                    ['FatHMM-W', 'Type 1 Biased', 'Type 2 Biased'],
                    loc='upper center',
                    bbox_to_anchor=(0.0, 1.15),
                    fancybox=True,
                    ncol=5,
                    prop={'size': 10},
                    numpoints=1)
    leg.get_frame().set_alpha(0.2)
    leg.get_frame().set_edgecolor("none")

    pl.subplots_adjust(left=0.065,
                       bottom=0.11,
                       right=0.99,
                       top=0.94,
                       wspace=0.03,
                       hspace=0.5)

    if release:
        pl.savefig(os.path.abspath('Output/Supplementary/FigureS11.pdf'))
    else:
        pl.savefig(os.path.abspath('Output/Supplementary/FigureS11.pdf'))
        pl.savefig(os.path.abspath('Output/Supplementary/FigureS11.tiff'),
                   dpi=300)
        pl.savefig(os.path.abspath('Output/Supplementary/FigureS11.jpg'))
    pl.close()
def printTableS2(score_data=None):
	val = utils.Validation()
	mv_predictor = utils.ProteinMajorityVote()
	lr_predictor = utils.LogisticRegression()
	f = open(os.path.abspath("Output/Supplementary/tableS2.csv"),'w')
	f.write(";;FatHMM-W;Logistic Regression over the features ln(Wn) & ln(Wd);Protein Majority Vote (MV)\n")
	datasets = ['humvar','exovar','varibench_selected','predictSNP_selected','swissvar_selected']
	for i,dataset in enumerate(datasets):
		score_data.selectDataset(dataset)
		labels = score_data.getTrueLabels()
		
		print "\tTraining Logistic Regression on Features ln(Wn) and ln(Wd)"
		lr = lr_predictor.run(true_labels=labels,features=score_data.getFatHMMFeatures(),folds=10)
		print "\tPerforming a Protein Majority Vote for dataset: " + dataset
		mv =  mv_predictor.getMV4Dataset(true_labels=labels,proteins=score_data.getUniprotIDs(),folds=10)
		
		string = dataset + ";AUC;"
		string += "%.2f"% val.getROCStats(labels,score_data.getScores('fathmm_w'))['auc'] + ";"
		string += "%.2f (%.2f)"% (lr['auc'],lr['auc_std']) + ";" 
		string += "%.2f (%.2f)"% (mv['auc'],mv['auc_std'])
		f.write(string + "\n")
		
		string = ";AUC-PR;"
		string += "%.2f"% val.getROCStats(labels,score_data.getScores('fathmm_w'))['pr_auc'] + ";"
		string += "%.2f (%.2f)"% (lr['auc_pr'],lr['auc_pr_std']) + ";"
		string += "%.2f (%.2f)"% (mv['auc_pr'],mv['auc_pr_std'])
		f.write(string + "\n")
		
		string = ";Accuracy;"
		string += "%.2f"% val.getPredictionStats(labels,score_data.getPredictedLabels('fathmm_w'))['accuracy'] + ";"
		string += "%.2f (%.2f)"% (lr['accuracy'],lr['accuracy_std']) + ";"
		string += "%.2f (%.2f)"% (mv['accuracy'],mv['accuracy_std'])
		f.write(string + "\n")
		
		string = ";F1-Score;"
		string += "%.2f"% val.getPredictionStats(labels,score_data.getPredictedLabels('fathmm_w'))['f1_score'] + ";"
		string += "%.2f (%.2f)"% (lr['f1_score'],lr['f1_score_std']) + ";"
		string += "%.2f (%.2f)"% (mv['f1_score'],mv['f1_score_std'])
		f.write(string + "\n")
		
		string = ";MCC;"
		string += "%.2f"% val.getPredictionStats(labels,score_data.getPredictedLabels('fathmm_w'))['mcc'] + ";"
		string += "%.2f (%.2f)"% (lr['mcc'],lr['mcc_std']) + ";"
		string += "%.2f (%.2f)"% (mv['mcc'],mv['mcc_std'])
		f.write(string + "\n")
		
		string = ";Precision;"
		string += "%.2f"% val.getPredictionStats(labels,score_data.getPredictedLabels('fathmm_w'))['precision'] + ";"
		string += "%.2f (%.2f)"% (lr['precision'],lr['precision_std']) + ";"
		string += "%.2f (%.2f)"% (mv['precision'],mv['precision_std'])
		f.write(string + "\n")
		
		string = ";Recall;"
		string += "%.2f"% val.getPredictionStats(labels,score_data.getPredictedLabels('fathmm_w'))['recall'] + ";"
		string += "%.2f (%.2f)"% (lr['recall'],lr['recall_std']) + ";"
		string += "%.2f (%.2f)"% (mv['recall'],mv['recall_std'])
		f.write(string + "\n")
		
		string = ";Negative Predictive Value;"
		string += "%.2f"% val.getPredictionStats(labels,score_data.getPredictedLabels('fathmm_w'))['npv'] + ";"
		string += "%.2f (%.2f)"% (lr['npv'],lr['npv_std']) + ";"
		string += "%.2f (%.2f)"% (mv['npv'],mv['npv_std'])
		f.write(string + "\n")
		
		string = ";Specificity;"
		string += "%.2f"% val.getPredictionStats(labels,score_data.getPredictedLabels('fathmm_w'))['specificity'] + ";"
		string += "%.2f (%.2f)"% (lr['specificity'],lr['specificity_std']) + ";"
		string += "%.2f (%.2f)"% (mv['specificity'],mv['specificity_std'])
		f.write(string + "\n")
	f.close()
示例#8
0
    datas = utils.get_datas(PREPROCESSING_TRAIN_DATA_DIR)

datas_y = preprocessing_to_train.select_data_y(datas)

train_x, train_y, val_x, val_y = preprocessing_to_train.classification_data(
    data_x=datas, data_y=datas_y)

with tf.Session() as sess:

    inputs, labels, keep_prob = utils.build_input()

    predictions, cost, optimizer, initial_state, final_state, cell = LSTM.LSTM_cell(
        LSTM_SIZE, keep_prob, LSTM_LAYERS, BATCH_SIZE, train_x, labels,
        LEARNING_RATE)

    accuracy = utils.Validation(predictions, labels)

    utils.draw_scalar(cost, 'loss')

    utils.draw_scalar(accuracy, 'Batch accurcy')

    merged = tf.summary.merge_all()

    writer = tf.summary.FileWriter('logs/', sess.graph)

    sess.run(tf.global_variables_initializer())

    with graph.as_default():
        saver = tf.train.Saver()

    iteration = 1