Exemplo n.º 1
0
def Sashimiplottting(bamdir, countsin, inputpsi, genelis):
    inputpsi = unique.filepath(inputpsi)
    text_file = open(inputpsi, 'rU')
    lines = text_file.readlines()

    text_file.close()
    samp = sample(inputpsi)
    gene_label, gene_sym = genelist(inputpsi)

    header = True
    junction_max = []
    countsin = unique.filepath(countsin)
    for line in open(countsin, 'rU').xreadlines():
        data = cleanUpLine(line)
        t = string.split(data, '\t')
        if header:
            samples = t[1:]
            header = False
            exon_sum_array = [0] * len(samples)
            count_sum_array = [0] * len(samples)
        else:
            values = map(float, t[1:])
            count_sum_array = [
                sum(value) for value in zip(*[count_sum_array, values])
            ]

        for i in range(len(samp)):
            sample_read[samp[i]] = count_sum_array[i]
        #print samp[i],sample_read[samp[i]]

    genelis = unique.filepath(genelis)

    sashmi_plot_list(bamdir, genelis, gene_label, lines, samp, gene_sym)
Exemplo n.º 2
0
def retreiveAllKnownSpliceSites():
    ### Uses a priori strand information when none present
    import export, unique
    chromosomes_found={}
    parent_dir = export.findParentDir(bam_file)
    species = None
    for file in os.listdir(parent_dir):
        if 'AltAnalyze_report' in file and '.log' in file:
            log_file = unique.filepath(parent_dir+'/'+file)
            log_contents = open(log_file, "rU")
            species_tag = '	species: '
            for line in log_contents:
                line = line.rstrip()
                if species_tag in line:
                    species = string.split(line,species_tag)[1]
    if species == None:
        species = IndicatedSpecies

    splicesite_db={}
    refExonCoordinateFile = unique.filepath('AltDatabase/ensembl/'+species+'/'+species+'_Ensembl_exon.txt')
    firstLine=True
    for line in open(refExonCoordinateFile,'rU').xreadlines():
        if firstLine: firstLine=False
        else:
            line = line.rstrip('\n')
            t = string.split(line,'\t'); #'gene', 'exon-id', 'chromosome', 'strand', 'exon-region-start(s)', 'exon-region-stop(s)', 'constitutive_call', 'ens_exon_ids', 'splice_events', 'splice_junctions'
            geneID, exon, chr, strand, start, stop = t[:6]
            #start = int(start); stop = int(stop)
            #geneID = string.split(exon,':')[0]
            splicesite_db[chr,start]=strand
            splicesite_db[chr,stop]=strand
            if len(chr)<5 or ('GL0' not in chr and 'GL' not in chr and 'JH' not in chr and 'MG' not in chr):
                chromosomes_found[string.replace(chr,'chr','')] = []
    
    return splicesite_db,chromosomes_found
Exemplo n.º 3
0
def Sashimiplottting(bamdir,countsin,PSIFilename,eventsToVisualizeFilename,events=None):
    PSIFilename = unique.filepath(PSIFilename)
    header=True
    junction_max=[]
    countsin = unique.filepath(countsin)
    count_sum_array=[]
    count=0
    for line in open(countsin,'rU').xreadlines():
	data = cleanUpLine(line)
	t = string.split(data,'\t')
	if header:
	    samples = []
	    for s in t[1:]:
		if '.bed' not in s: s+='.bed'
		samples.append(s)
	    header=False
	    count_sum_array=[0]*len(samples)
	else:
	    values = map(float,t[1:])
	    count_sum_array = [sum(value) for value in zip(*[count_sum_array,values])]
	    count+=1
	    if count >30000 and 'salomonis' in bamdir: break

    index=0
    for sample in samples:
        count_sum_array_db[sample] = count_sum_array[index]
        index+=1

    if events==None:
        #print 'Preparing Sashimi-Input:',eventsToVisualizeFilename
        eventsToVisualizeFilename = unique.filepath(eventsToVisualizeFilename)

    gene_to_symbol=sashmi_plot_list(bamdir,eventsToVisualizeFilename,PSIFilename,events=events)
    return gene_to_symbol
Exemplo n.º 4
0
def retreiveAllKnownSpliceSites():
    ### Uses a priori strand information when none present
    import export, unique
    chromosomes_found={}
    parent_dir = export.findParentDir(bam_file)
    for file in os.listdir(parent_dir):
        if 'AltAnalyze_report' in file and '.log' in file:
            log_file = unique.filepath(parent_dir+'/'+file)
            log_contents = open(log_file, "rU")
            species_tag = '	species: '
            for line in log_contents:
                line = line.rstrip()
                if species_tag in line:
                    species = string.split(line,species_tag)[1]
    splicesite_db={}
    refExonCoordinateFile = unique.filepath('AltDatabase/ensembl/'+species+'/'+species+'_Ensembl_exon.txt')
    firstLine=True
    for line in open(refExonCoordinateFile,'rU').xreadlines():
        if firstLine: firstLine=False
        else:
            line = line.rstrip('\n')
            t = string.split(line,'\t'); #'gene', 'exon-id', 'chromosome', 'strand', 'exon-region-start(s)', 'exon-region-stop(s)', 'constitutive_call', 'ens_exon_ids', 'splice_events', 'splice_junctions'
            geneID, exon, chr, strand, start, stop = t[:6]
            #start = int(start); stop = int(stop)
            #geneID = string.split(exon,':')[0]
            splicesite_db[chr,start]=strand
            splicesite_db[chr,stop]=strand
            if len(chr)<5 or ('GL0' not in chr and 'GL' not in chr and 'JH' not in chr and 'MG' not in chr):
                chromosomes_found[string.replace(chr,'chr','')] = []
    
    return splicesite_db,chromosomes_found
Exemplo n.º 5
0
def Sashimiplottting(bamdir,countsin,inputpsi,genelis):
    inputpsi = unique.filepath(inputpsi)
    text_file = open(inputpsi,'rU')
    lines = text_file.readlines()
   
    text_file.close()
    samp=sample(inputpsi)
    gene_label,gene_sym=genelist(inputpsi)

    header=True
    junction_max=[]
    countsin = unique.filepath(countsin)

    for line in open(countsin,'rU').xreadlines():
        data = cleanUpLine(line)
        t = string.split(data,'\t')
        if header:
            samples = t[1:]
            header=False
            exon_sum_array=[0]*len(samples)
            count_sum_array=[0]*len(samples)
        else:
            values = map(float,t[1:])
            count_sum_array = [sum(value) for value in zip(*[count_sum_array,values])]

        for i in range(len(samp)):
	   sample_read[samp[i]]=count_sum_array[i]
          #print samp[i],sample_read[samp[i]]

    genelis = unique.filepath(genelis)

    sashmi_plot_list(bamdir,genelis,gene_label,lines,samp,gene_sym)
Exemplo n.º 6
0
def retreiveAllKnownSpliceSites(returnExonRetention=False,DesignatedSpecies=None,path=None):
    ### Uses a priori strand information when none present
    import export, unique
    chromosomes_found={}
    try: parent_dir = export.findParentDir(bam_file)
    except Exception: parent_dir = export.findParentDir(path)
    species = None
    for file in os.listdir(parent_dir):
        if 'AltAnalyze_report' in file and '.log' in file:
            log_file = unique.filepath(parent_dir+'/'+file)
            log_contents = open(log_file, "rU")
            species_tag = '	species: '
            for line in log_contents:
                line = line.rstrip()
                if species_tag in line:
                    species = string.split(line,species_tag)[1]
    if species == None:
        try: species = IndicatedSpecies
        except Exception: species = DesignatedSpecies
    
    splicesite_db={}
    gene_coord_db={}
    try:
        if ExonReference==None:
            exon_dir = 'AltDatabase/ensembl/'+species+'/'+species+'_Ensembl_exon.txt'
            length = verifyFileLength(exon_dir)
    except Exception:
        #print traceback.format_exc();sys.exit()
        length = 0
    if length==0:
        exon_dir = ExonReference
    refExonCoordinateFile = unique.filepath(exon_dir)
    firstLine=True
    for line in open(refExonCoordinateFile,'rU').xreadlines():
        if firstLine: firstLine=False
        else:
            line = line.rstrip('\n')
            t = string.split(line,'\t'); #'gene', 'exon-id', 'chromosome', 'strand', 'exon-region-start(s)', 'exon-region-stop(s)', 'constitutive_call', 'ens_exon_ids', 'splice_events', 'splice_junctions'
            geneID, exon, chr, strand, start, stop = t[:6]
            spliceEvent = t[-2]
            #start = int(start); stop = int(stop)
            #geneID = string.split(exon,':')[0]
            try:
                gene_coord_db[geneID,chr].append(int(start))
                gene_coord_db[geneID,chr].append(int(stop))
            except Exception:
                gene_coord_db[geneID,chr] = [int(start)]
                gene_coord_db[geneID,chr].append(int(stop))
            if returnExonRetention:
                if 'exclusion' in spliceEvent or 'exclusion' in spliceEvent:
                    splicesite_db[geneID+':'+exon]=[]
            else:
                splicesite_db[chr,start]=strand
                splicesite_db[chr,stop]=strand
                if len(chr)<5 or ('GL0' not in chr and 'GL' not in chr and 'JH' not in chr and 'MG' not in chr):
                    chromosomes_found[string.replace(chr,'chr','')] = []
    for i in gene_coord_db:
        gene_coord_db[i].sort()
        gene_coord_db[i] = [gene_coord_db[i][0],gene_coord_db[i][-1]]
    return splicesite_db,chromosomes_found,gene_coord_db
Exemplo n.º 7
0
def Sashimiplottting(bamdir,countsin,PSIFilename,eventsToVisualizeFilename,events=None):
    PSIFilename = unique.filepath(PSIFilename)
    header=True
    junction_max=[]
    countsin = unique.filepath(countsin)
    count_sum_array=[]
    count=0
    for line in open(countsin,'rU').xreadlines():
	data = cleanUpLine(line)
	t = string.split(data,'\t')
	if header:
	    samples = []
	    for s in t[1:]:
		if '.bed' not in s: s+='.bed'
		samples.append(s)
	    header=False
	    count_sum_array=[0]*len(samples)
	else:
	    values = map(float,t[1:])
	    count_sum_array = [sum(value) for value in zip(*[count_sum_array,values])]
	    count+=1
	    if count >30000 and 'salomonis' in bamdir: break

    index=0
    for sample in samples:
        count_sum_array_db[sample] = count_sum_array[index]
        index+=1

    if events==None:
        #print 'Preparing Sashimi-Input:',eventsToVisualizeFilename
        eventsToVisualizeFilename = unique.filepath(eventsToVisualizeFilename)

    gene_to_symbol=sashmi_plot_list(bamdir,eventsToVisualizeFilename,PSIFilename,events=events)
    return gene_to_symbol
Exemplo n.º 8
0
def verifyFile(filename):
    status = False
    try:
        fn=unique.filepath(filename)
        for line in open(fn,'rU').xreadlines(): status = True;break
    except Exception: status = False
    return status
Exemplo n.º 9
0
def importIsoformAnnotations(species,platform,psievents,annotType=None,junctionPairFeatures={},dataType='reciprocal'):
    count=0
    if annotType == 'domain':
        if dataType == 'reciprocal':
            fn = 'AltDatabase/'+species+'/'+platform+'/'+'probeset-domain-annotations-exoncomp.txt'
        else:
            fn = 'AltDatabase/'+species+'/'+platform+'/junction/'+'probeset-domain-annotations-exoncomp.txt'
    else:
        if dataType == 'reciprocal':
            fn = 'AltDatabase/'+species+'/'+platform+'/'+'probeset-protein-annotations-exoncomp.txt'
        else:
            fn = 'AltDatabase/'+species+'/'+platform+'/junction/'+'probeset-protein-annotations-exoncomp.txt'
    fn = unique.filepath(fn)
    for line in open(fn,'rU'):
        line = line.rstrip('\n')
        values = string.split(line,'\t')
        junctions = string.split(values[0],'|')
        features = formatFeatures(values[1:])
        antiFeatures = inverseFeatureDirections(features)
        if tuple(junctions) in psievents:
            try: junctionPairFeatures[tuple(junctions)].append(string.join(features,', '))
            except Exception: junctionPairFeatures[tuple(junctions)] = [string.join(features,', ')]
        if dataType == 'reciprocal':
            junctions.reverse()
            if tuple(junctions) in psievents:
                try: junctionPairFeatures[tuple(junctions)].append(string.join(antiFeatures,', '))
                except Exception: junctionPairFeatures[tuple(junctions)] = [string.join(antiFeatures,', ')]
        count+=1
    print count, 'protein predictions added'
    return junctionPairFeatures
Exemplo n.º 10
0
def importDatabaseEventAnnotations(species,platform):
    terminal_exons={}
    header=True
    count=0
    fn = 'AltDatabase/'+species+'/'+platform+'/'+species+'_Ensembl_exons.txt'
    fn = unique.filepath(fn)
    for line in open(fn,'rU'):
        line = line.rstrip('\n')
        values = string.split(line,'\t')
        if header:
            eI = values.index('splice_events')
            header=False
            continue
        
        exon = values[0]
        event = values[eI]
        if 'alt-N-term' in event or 'altPromoter' in event:
            if 'cassette' not in event:
                terminal_exons[exon] = 'altPromoter'
                count+=1    
        elif 'alt-C-term' in event:
            if 'cassette' not in event:
                terminal_exons[exon] = 'alt-C-term'
                count+=1
        """
        elif 'bleedingExon' in event or 'altFinish' in event:
            terminal_exons[exon] = 'bleedingExon'
            count+=1"""
    print count, 'terminal exon annotations stored'
    return terminal_exons
Exemplo n.º 11
0
def indexdic(fname):
    fname = unique.filepath(fname)
    head=0
    for line in open(fname,'rU').xreadlines():
#for k in range(len(a['AltAnalyze_ID'])):
     
     if head ==0:
	head=1
	continue
     else:
	a=string.split(line,'\t')

    #p=a['AltAnalyze_ID'][k]
	p=a[0]
	j=string.split(p,':')
    #print j[0]
	for i in range(len(j)):
	    if "ENS" in j[i]:
		if '-' in j[i]:
		    ji=string.split(j[i],'-')
		    jj=ji[1]
		else:
		    jj=j[i]
	    #print jj,'first check'
		if jj in index_read:
			index_read[jj].append(p)
		else:
			index_read[jj]=[p,]
    return index_read
Exemplo n.º 12
0
def importExpressionValues(filename):
    """ Imports tab-delimited expression values"""

    header = True
    sample_expression_db = {}
    fn = unique.filepath(filename)
    for line in open(fn, "rU").xreadlines():
        data = UI.cleanUpLine(line)
        if header:
            sample_names = string.split(data, "\t")
            header = False
        else:
            exp_values = string.split(data, "\t")
            gene = exp_values[0]
            index = 1
            for value in exp_values[1:]:
                sample_name = sample_names[index]
                if sample_name in sample_expression_db:
                    gene_expression_db = sample_expression_db[sample_name]
                    gene_expression_db[gene] = value
                else:
                    gene_expression_db = {}
                    gene_expression_db[gene] = value
                    sample_expression_db[sample_name] = gene_expression_db
                index += 1
    return sample_expression_db
Exemplo n.º 13
0
def extractFeatures(species,countinp):
    import export
    ExonsPresent=False
    if 'counts.' in countinp:
	feature_file = string.replace(countinp,'counts.','features.')
	fe = export.ExportFile(feature_file)
	firstLine = True
	for line in open(countinp,'rU').xreadlines():
	    if firstLine: firstLine=False
	    else:
		feature_info = string.split(line,'\t')[0]
		fe.write(feature_info+'\n')
		if ExonsPresent == False:
		    exon = string.split(feature_info,'=')[0]
		    if '-' not in exon:
			ExonsPresent = True
			
	### Add exon-info if necessary
	if ExonsPresent == False:
	    exons_file = unique.filepath('AltDatabase/ensembl/'+species+'/'+species+'_Ensembl_exon.txt')
	    firstLine = True
	    for line in open(exons_file,'rU').xreadlines():
		if firstLine: firstLine=False
		else:
		    line = line.rstrip('\n')
		    t = string.split(line,'\t')
		    gene = t[0]
		    exon = t[1]
		    chr = t[2]
		    strand = t[3]
		    start = t[4]
		    end = t[5]
		    fe.write(gene+':'+exon+'='+chr+':'+start+'-'+end+'\n')
	fe.close()
    return feature_file	
Exemplo n.º 14
0
def importPSIJunctions(fname):
    fname = unique.filepath(fname)
    header=True
    for line in open(fname,'rU').xreadlines():
        line = line.rstrip(os.linesep)
	if header: header = False
	else:
	    t=string.split(line,'\t')
	    try:
		### Re-order these to have the exclusion be listed first
		j1a,j1b = string.split(t[2],'-')
		j2a,j2b = string.split(t[3],'-')
		j1a = string.split(j1a,':')[1]
		j2a = string.split(j2a,':')[1]
		j1a = int(float(string.split(j1a,'.')[0][1:]))
		j1b = int(float(string.split(j1b,'.')[0][1:]))
		j2a = int(float(string.split(j2a,'.')[0][1:]))
		j2b = int(float(string.split(j2b,'.')[0][1:]))
		#print [j1a,j2a,j1b,j2b], t[2], t[3]
		if j1a>j2a or j1b<j2b:
		    val = t[2]+' '+t[3]
		else:
		    val=t[3]+' '+t[2]
	    except Exception:
		#print traceback.format_exc();sys.exit()
		val=t[2]+' '+t[3]
	    if '-' not in t[2]:
		val = t[3]+' '+t[2]
	    val = string.replace(val,":","__")
	    lis.append(val)
	    #print t[0]
    return lis
Exemplo n.º 15
0
def importAgilentExpressionValues(filename,array,channel_to_extract):
    """ Imports Agilent Feature Extraction files for one or more channels """
    print '.',
    red_expr_db={}
    green_expr_db={}
    parse=False
    fn=unique.filepath(filename)
    for line in open(fn,'rU').xreadlines():
        data = UI.cleanUpLine(line)
        if parse==False:
            if 'ProbeName' in data:
                headers = string.split(data,'\t')
                pn = headers.index('ProbeName')
                try: gc = headers.index('gProcessedSignal')
                except Exception: pass
                try: rc = headers.index('rProcessedSignal')
                except Exception: pass
                parse = True
        else:
            t = string.split(data,'\t')
            probe_name = t[pn]
            try: green_channel = math.log(float(t[gc])+1,2) #min is 0
            except Exception: pass
            try: red_channel = math.log(float(t[rc])+1,2) #min is 0
            except Exception: pass
            if 'red' in channel_to_extract:
                red_expr_db[probe_name] = red_channel
            if 'green' in channel_to_extract:
                green_expr_db[probe_name] = green_channel

    if 'red' in channel_to_extract:
        red_channel_db[array] = red_expr_db
    if 'green' in channel_to_extract:
        green_channel_db[array] = green_expr_db
Exemplo n.º 16
0
def genelist(fname):
    fname = unique.filepath(fname)
    header=True
    for line in open(fname,'rU').xreadlines():
        line = line.rstrip(os.linesep)
	if header: header = False
	else:
	    t=string.split(line,'\t')
	    try:
		### Re-order these to have the exclusion be listed first
		j1a,j1b = string.split(t[2],'-')
		j2a,j2b = string.split(t[3],'-')
		j1a = string.split(j1a,':')[1]
		j2a = string.split(j2a,':')[1]
		j1a = int(float(string.split(j1a,'.')[0][1:]))
		j1b = int(float(string.split(j1b,'.')[0][1:]))
		j2a = int(float(string.split(j2a,'.')[0][1:]))
		j2b = int(float(string.split(j2b,'.')[0][1:]))
		#print [j1a,j2a,j1b,j2b], t[2], t[3]
		if j1a>j2a or j1b<j2b:
		    val = t[2]+' '+t[3]
		else:
		    val=t[3]+' '+t[2]
	    except Exception:
		#print traceback.format_exc();sys.exit()
		val=t[2]+' '+t[3]
	    if '-' not in t[2]:
		val = t[3]+' '+t[2]
	    val = string.replace(val,":","__")
	    lis.append(val)
	    #print t[0]
    return lis
Exemplo n.º 17
0
def importPSIJunctions(fname):
    All_PSI_Reciprocol_Junctions=[]
    fname = unique.filepath(fname)
    header=True
    for line in open(fname,'rU').xreadlines():
        line = line.rstrip(os.linesep)
	if header: header = False
	else:
	    t=string.split(line,'\t')
	    junction1 = t[2]
	    junction2 = t[3]
	    try:
		### Re-order these to have the exclusion be listed first
		j1a,j1b = string.split(t[2],'-')
		j2a,j2b = string.split(t[3],'-')
		j1a = string.split(j1a,':')[1]
		j2a = string.split(j2a,':')[1]
		j1a = int(float(string.split(j1a,'.')[0][1:]))
		j1b = int(float(string.split(j1b,'.')[0][1:]))
		j2a = int(float(string.split(j2a,'.')[0][1:]))
		j2b = int(float(string.split(j2b,'.')[0][1:]))
		#print [j1a,j2a,j1b,j2b], t[2], t[3]
		event1 = string.replace(junction2,":","__") ### first listed junction
		event2 = string.replace(junction2,":","__") ### second listed junction
		if j1a>j2a or j1b<j2b:
		    event_pair = event1,event2
		else:
		    event_pair=event2,event1
	    except Exception:
		#print traceback.format_exc();sys.exit()
		event_pair=event1,event2
	    if '-' not in event1:
		event_pair = event2,event1
	    All_PSI_Reciprocol_Junctions.append(event_pair)
    return All_PSI_Reciprocol_Junctions
Exemplo n.º 18
0
def importCircularRNAEvents(folder, circ_p):
    dataset_events = {}
    files = unique.read_directory(folder)
    for file in files:
        if 'circRNA.' in file and '.txt' in file:
            events = []
            dataset = file[:-4]
            fn = unique.filepath(folder + '/' + file)
            firstRow = True
            for line in open(fn, 'rU').xreadlines():
                data = cleanUpLine(line)
                t = string.split(data, '\t')
                if firstRow:
                    index = 0
                    """ Standard Fields from MultiPath-PSI """
                    for i in t:
                        if 'PValue' == i:
                            pv = index
                        if 'logFC' == i:
                            lf = index
                        index += 1
                    firstRow = False
                else:
                    id = t[0]
                    pval = float(t[pv])
                    logFC = float(t[lf])
                    ci = circInformation(id, pval, logFC)
                    if pval < circ_p:
                        events.append(ci)
            dataset_events[dataset] = events
    return dataset_events
Exemplo n.º 19
0
def reimportFeatures(featureFile):
    """ Import the exon and gene coordinates """
    gene_event_db = {}
    featureFile = unique.filepath(featureFile)
    head = 0
    for line in open(featureFile, 'rU').xreadlines():
        #for k in range(len(strand['AltAnalyze_ID'])):
        if head == 0: head = 1
        else:
            line = line.rstrip('\n')
            event = string.split(
                line, '\t'
            )[0]  #example event: ENSMUSG00000025915:E17.2-E17.5=chr1:9885753-9886047
            event = string.replace(event, ':', '__')
            event_split = string.split(event, '__')
            for i in range(len(event_split)):
                if "ENS" in event_split[i] or '00000' in event_split[i]:
                    if '-' in event_split[i]:
                        ji = string.split(event_split[i], '-')
                        gene = ji[1]
                    else:
                        gene = event_split[i]
                    featureID, position = string.split(
                        event, '='
                    )  ### store the feature (exon or junction) position and ID separately
                    pd = PositionData(position)
                    if gene in gene_event_db:
                        feature_db = gene_event_db[gene]
                        feature_db[featureID] = pd
                    else:
                        feature_db = {featureID: pd}
                        gene_event_db[gene] = feature_db
    return gene_event_db
Exemplo n.º 20
0
def importExpressionValues(filename):
    """ Imports tab-delimited expression values"""

    header = True
    sample_expression_db={}
    fn=unique.filepath(filename)
    for line in open(fn,'rU').xreadlines():
        data = UI.cleanUpLine(line)
        if header:
            sample_names = string.split(data,'\t')
            header = False
        else:
            exp_values = string.split(data,'\t')
            gene = exp_values[0]
            index=1
            for value in exp_values[1:]:
                sample_name = sample_names[index]
                if sample_name in sample_expression_db:
                    gene_expression_db = sample_expression_db[sample_name]
                    gene_expression_db[gene] = value
                else:
                    gene_expression_db={}
                    gene_expression_db[gene] = value
                    sample_expression_db[sample_name] = gene_expression_db
                index+=1
    return sample_expression_db
Exemplo n.º 21
0
def reimportFeatures(featureFile):
    """ Import the exon and gene coordinates """
    gene_event_db={}
    featureFile = unique.filepath(featureFile)
    head=0
    for line in open(featureFile,'rU').xreadlines():
     #for k in range(len(strand['AltAnalyze_ID'])):
     if head ==0: head=1
     else:
	line = line.rstrip('\n')
	event=string.split(line,'\t')[0] #example event: ENSMUSG00000025915:E17.2-E17.5=chr1:9885753-9886047
	event = string.replace(event,':','__')
	event_split=string.split(event,'__')
	for i in range(len(event_split)):
	    if "ENS" in event_split[i] or '00000' in event_split[i]:
		if '-' in event_split[i]:
		    ji=string.split(event_split[i],'-')
		    gene=ji[1]
		else:
		    gene=event_split[i]
		featureID,position = string.split(event,'=') ### store the feature (exon or junction) position and ID separately
		pd = PositionData(position)
		if gene in gene_event_db:
		    feature_db = gene_event_db[gene]
		    feature_db[featureID] = pd
		else:
		    feature_db = {featureID:pd}
		    gene_event_db[gene]=feature_db
    return gene_event_db
Exemplo n.º 22
0
def genelist(fname):
    fname = unique.filepath(fname)
    for line in open(fname,'rU').xreadlines():
        line = line.rstrip(os.linesep)
        t=string.split(line,'\t')
	val=t[2]+' '+t[3]
        lis.append(val)
        #print t[0]
    return lis
Exemplo n.º 23
0
def verifyFileLength(filename):
    count = 0
    try:
        fn=unique.filepath(filename)
        for line in open(fn,'rU').xreadlines():
            count+=1
            if count>9: break
    except Exception: null=[]
    return count
Exemplo n.º 24
0
def extractFeatures(species,countsFileDir):
    import export
    ExonsPresent=False
    lastgene = None
    lastend = None
    genes_detected={}
    count=0
    first_last_exons = {} ### Make strand fake junction comprised of the first and last exon
    if 'counts.' in countsFileDir:
	### The feature_file contains only ExonID or Gene IDs and associated coordinates
	feature_file = string.replace(countsFileDir,'counts.','features.')
	fe = export.ExportFile(feature_file)
	firstLine = True
	for line in open(countsFileDir,'rU').xreadlines():
	    if firstLine: firstLine=False
	    else:
		feature_info = string.split(line,'\t')[0]
		fe.write(feature_info+'\n')
		junction_annotation = string.split(feature_info,'=')[0]
		if '-' in junction_annotation:
		    geneid = string.split(junction_annotation,':')[0]
		    genes_detected[geneid]=[]
		if ExonsPresent == False:
		    exon = string.split(feature_info,'=')[0]
		    if '-' not in exon:
			ExonsPresent = True
			
	### Add exon-info if necessary
	exons_file = unique.filepath('AltDatabase/ensembl/'+species+'/'+species+'_Ensembl_exon.txt')
        firstLine = True
	for line in open(exons_file,'rU').xreadlines():
	    if firstLine: firstLine=False
	    else:
		line = line.rstrip('\n')
		t = string.split(line,'\t')
		gene,exon,chr,strand,start,end = t[:6]
		if gene!=lastgene:
		    if len(genes_detected)==0 or gene in genes_detected: ### restrict to detected genes
			first_last_exons[gene,strand] = [(chr,start)]
		    if len(genes_detected)==0 or lastgene in genes_detected: ### restrict to detected genes
			try: first_last_exons[lastgene,laststrand].append(lastend)
			except Exception:
			    pass ### occurs for the first gene	
		if ExonsPresent == False:
		    fe.write(gene+':'+exon+'='+chr+':'+start+'-'+end+'\n')
		lastgene = gene; lastend = end; laststrand = strand
	if len(genes_detected)==0 or lastgene in genes_detected:
	    first_last_exons[lastgene,laststrand].append(lastend)
	
	### Add strand fake junction for the whole gene
	for (gene,strand) in first_last_exons:
	    (chr,start),end = first_last_exons[gene,strand]
	    if strand == '-':
		start,end = end,start # Need to encode strand in this annotation, do this by strand orienting the positions
	    fe.write(gene+':E1.1-E100.1'+'='+chr+':'+start+'-'+end+'\n')
	fe.close()
    return feature_file	### return the location of the exon and gene coordinates file
Exemplo n.º 25
0
def filepath(filename):
    try:
        import unique ### local to AltAnalyze
        fn = unique.filepath(filename)
    except Exception:
        ### Should work fine when run as a script with this (AltAnalyze code is specific for packaging with AltAnalyze)
        dir=os.path.dirname(dirfile.__file__)
        try: dir_list = os.listdir(filename); fn = filename ### test to see if the path can be found (then it is the full path)
        except Exception: fn=os.path.join(dir,filename)
    return fn
Exemplo n.º 26
0
def filepath(filename):
    try:
        import unique ### local to AltAnalyze
        fn = unique.filepath(filename)
    except Exception:
        ### Should work fine when run as a script with this (AltAnalyze code is specific for packaging with AltAnalyze)
        dir=os.path.dirname(dirfile.__file__)
        try: dir_list = os.listdir(filename); fn = filename ### test to see if the path can be found (then it is the full path)
        except Exception: fn=os.path.join(dir,filename)
    return fn
Exemplo n.º 27
0
def extractFeatures(species,countinp):
    import export
    ExonsPresent=False
    lastgene = None
    lastend = None
    genes_detected={}
    count=0
    first_last_exons = {} ### Make a fake junction comprised of the first and last exon
    if 'counts.' in countinp:
	feature_file = string.replace(countinp,'counts.','features.')
	fe = export.ExportFile(feature_file)
	firstLine = True
	for line in open(countinp,'rU').xreadlines():
	    if firstLine: firstLine=False
	    else:
		feature_info = string.split(line,'\t')[0]
		fe.write(feature_info+'\n')
		junction_annotation = string.split(feature_info,'=')[0]
		if '-' in junction_annotation:
		    geneid = string.split(junction_annotation,':')[0]
		    genes_detected[geneid]=[]
		if ExonsPresent == False:
		    exon = string.split(feature_info,'=')[0]
		    if '-' not in exon:
			ExonsPresent = True
			
	### Add exon-info if necessary

	exons_file = unique.filepath('AltDatabase/ensembl/'+species+'/'+species+'_Ensembl_exon.txt')
        firstLine = True
	for line in open(exons_file,'rU').xreadlines():
	    if firstLine: firstLine=False
	    else:
		line = line.rstrip('\n')
		t = string.split(line,'\t')
		gene,exon,chr,strand,start,end = t[:6]
		if gene!=lastgene:
		    if len(genes_detected)==0 or gene in genes_detected: ### restrict to detected genes
			first_last_exons[gene] = [(chr,start)]
		    if len(genes_detected)==0 or lastgene in genes_detected: ### restrict to detected genes
			try: first_last_exons[lastgene].append(lastend)
			except Exception:
			    pass ### occurs for the first gene	
		if ExonsPresent == False:
		    fe.write(gene+':'+exon+'='+chr+':'+start+'-'+end+'\n')
		lastgene = gene; lastend = end
	if len(genes_detected)==0 or lastgene in genes_detected:
	    first_last_exons[lastgene].append(lastend)
	
	### Add a fake junction for the whole gene
	for gene in first_last_exons:
	    (chr,start),end = first_last_exons[gene]
	    fe.write(gene+':E1.1-E100.1'+'='+chr+':'+start+'-'+end+'\n')
	fe.close()
    return feature_file	
Exemplo n.º 28
0
def importExonCoordinates(species):
    """ Import exon block, intron block and gene coordinates """

    firstRow = True
    exon_coordinate_path = 'AltDatabase/ensembl/' + species + '/' + species + '_Ensembl_exon.txt'
    fn = unique.filepath(exon_coordinate_path)
    gene_coordinates = {}
    gene_to_exons = {}
    exon_block_coordinates = {}
    gene_chr_strand = {}
    for line in open(fn, 'rU').xreadlines():
        data = cleanUpLine(line)
        t = string.split(data, '\t')
        if firstRow:
            firstRow = False
        else:
            gene, exonid, chr, strand, exon_region_starts, exon_region_ends, constitutive_call, ens_exon_ids, splice_events, splice_junctions = t
            exon_region_starts = map(int, string.split(exon_region_starts,
                                                       '|'))
            exon_region_ends = map(int, string.split(exon_region_ends, '|'))
            exon_block = gene + ':' + string.split(exonid, '.')[0]
            gene_chr_strand[gene] = chr, strand
            if gene in gene_to_exons:
                gene_to_exons[gene].append(exon_block)
            else:
                gene_to_exons[gene] = [exon_block]
            if gene in gene_coordinates:
                gene_coordinates[gene] += exon_region_starts + exon_region_ends
            else:
                gene_coordinates[gene] = exon_region_starts + exon_region_ends
            if exon_block in exon_block_coordinates:
                exon_block_coordinates[
                    exon_block] += exon_region_starts + exon_region_ends
            else:
                exon_block_coordinates[
                    exon_block] = exon_region_starts + exon_region_ends

    for gene in gene_coordinates:
        gene_coordinates[gene].sort()
        start = gene_coordinates[gene][0]
        end = gene_coordinates[gene][-1]
        chr, strand = gene_chr_strand[gene]
        gene_coordinates[gene] = chr, strand, start, end

    for exon in exon_block_coordinates:
        exon_block_coordinates[exon].sort()
        start = exon_block_coordinates[exon][0]
        end = exon_block_coordinates[exon][-1]
        chr, strand = gene_chr_strand[string.split(exon, ':')[0]]
        exon_block_coordinates[exon] = chr, strand, start, end

    print len(gene_coordinates), 'genes'
    print len(exon_block_coordinates), 'exons/introns'
    return gene_coordinates, exon_block_coordinates, gene_to_exons
Exemplo n.º 29
0
def update_plot_settings(bamdir, group_psi_values, sample_headers):
    ### This functions writes out the sample orders, colors and sequence coverage for each BAM files for SashimiPlot
    bams = []
    sample_colors = []
    sample_coverage = []
    colors = [
        "red",
        "blue",
        "green",
        "grey",
        "orange",
        "purple",
        "yellow",
        "peach",
        "pink",
        "violet",
        "magenta",
        "navy",
    ]
    colors = colors * 300
    color_index = 0

    for group in group_psi_values:
        for index in group_psi_values[group]:
            g = sample_headers[index].replace(".bed", ".bam")
            bams.append('"' + g + '"')
            sample_colors.append('"' + colors[color_index] + '"')
            sample_coverage.append(str(int(sampleReadDepth[index])))
        color_index += 1  ### reset for the new group
    bams = string.join(bams, ",")
    sample_colors = string.join(sample_colors, ",")
    sample_coverage = string.join(sample_coverage, ",")

    export_pl = open(unique.filepath("Config/sashimi_plot_settings.txt"), "w")
    export_pl.write("[data]\n")
    export_pl.write("bam_prefix = " + bamdir + "\n")
    export_pl.write("bam_files =[" + bams + "]\n")

    export_pl.write("\n")
    export_pl.write("[plotting]")
    export_pl.write("\n")
    export_pl.write("fig_width = 7 \nfig_height = 7 \nintron_scale = 30 \nexon_scale = 4 \nlogged = False\n")
    export_pl.write("font_size = 6 \nbar_posteriors = False \nnyticks = 4 \nnxticks = 4 \n")
    export_pl.write("show_ylabel = False \nshow_xlabel = True \nshow_posteriors = False \nnumber_junctions = True \n")
    export_pl.write("resolution = .5 \nposterior_bins = 40 \ngene_posterior_ratio = 5 \n")
    export_pl.write("colors =[" + sample_colors + "]\n")
    export_pl.write("coverages =[" + sample_coverage + "]\n")

    export_pl.write('bar_color = "b" \nbf_thresholds = [0, 1, 2, 5, 10, 20]')
    export_pl.close()
Exemplo n.º 30
0
def searchDirectory(directory, var, secondary=None):
    directory = unique.filepath(directory)

    files = unique.read_directory(directory)
    for file in files:
        if var in file:
            if secondary == None:
                return directory + '/' + file
                break
            elif secondary in file:
                return directory + '/' + file
                break

    ### if all else fails
    return directory + '/' + file
Exemplo n.º 31
0
def sample(fname):
    fname = unique.filepath(fname)
    head=0
    samplelis=[]
    for line in open(fname,'rU').xreadlines():
        line = cleanUpLine(line)
	if head ==0:
	    t=string.split(line,'\t')
	    #print t
	    for p in range(11,len(t)):
		samplelis.append(t[p])
	    head=1
        else:
	    break;
    return samplelis
Exemplo n.º 32
0
def searchDirectory(directory,var,secondary=None):
    directory = unique.filepath(directory)

    files = unique.read_directory(directory)
    for file in files:
        if var in file:
            if secondary== None:
                return directory+'/'+file
                break
            elif secondary in file:
                return directory+'/'+file
                break
            
    ### if all else fails
    return directory+'/'+file 
Exemplo n.º 33
0
def sample(fname):
    fname = unique.filepath(fname)
    head = 0
    samplelis = []
    for line in open(fname, 'rU').xreadlines():
        line = cleanUpLine(line)
        if head == 0:
            t = string.split(line, '\t')
            #print t
            for p in range(11, len(t)):
                samplelis.append(t[p])
            head = 1
        else:
            break
    return samplelis
Exemplo n.º 34
0
def update_plot_settings(bamdir, group_psi_values, sample_headers):
    ### This functions writes out the sample orders, colors and sequence coverage for each BAM files for SashimiPlot
    bams = []
    sample_colors = []
    sample_coverage = []
    colors = [
        'red', 'blue', 'green', 'grey', 'orange', 'purple', 'yellow', 'peach',
        'pink', 'violet', 'magenta', 'navy'
    ]
    colors = colors * 300
    color_index = 0

    for group in group_psi_values:
        for index in group_psi_values[group]:
            g = sample_headers[index].replace('.bed', '.bam')
            bams.append('"' + g + '"')
            sample_colors.append('"' + colors[color_index] + '"')
            sample_coverage.append(str(int(sampleReadDepth[index])))
        color_index += 1  ### reset for the new group
    bams = string.join(bams, ',')
    sample_colors = string.join(sample_colors, ',')
    sample_coverage = string.join(sample_coverage, ',')

    export_pl = open(unique.filepath('Config/sashimi_plot_settings.txt'), 'w')
    export_pl.write('[data]\n')
    export_pl.write('bam_prefix = ' + bamdir + '\n')
    export_pl.write('bam_files =[' + bams + ']\n')

    export_pl.write('\n')
    export_pl.write('[plotting]')
    export_pl.write('\n')
    export_pl.write(
        'fig_width = 7 \nfig_height = 7 \nintron_scale = 30 \nexon_scale = 4 \nlogged = False\n'
    )
    export_pl.write(
        'font_size = 6 \nbar_posteriors = False \nnyticks = 4 \nnxticks = 4 \n'
    )
    export_pl.write(
        'show_ylabel = False \nshow_xlabel = True \nshow_posteriors = False \nnumber_junctions = True \n'
    )
    export_pl.write(
        'resolution = .5 \nposterior_bins = 40 \ngene_posterior_ratio = 5 \n')
    export_pl.write('colors =[' + sample_colors + ']\n')
    export_pl.write('coverages =[' + sample_coverage + ']\n')

    export_pl.write('bar_color = "b" \nbf_thresholds = [0, 1, 2, 5, 10, 20]')
    export_pl.close()
Exemplo n.º 35
0
def genelist(fname):
    fname = unique.filepath(fname)
    lis = []
    for line in open(fname, 'rU').xreadlines():
        line = cleanUpLine(line)
        t = string.split(line, '\t')
        gene = string.split(t[2], ':')
        val = t[2] + ' ' + t[3]
        lis.append(val)

        if gene[0] in gene_sym:
            continue
        else:
            gene_sym[gene[0]] = t[0]

        #print t[0]
    return lis, gene_sym
Exemplo n.º 36
0
def remoteSashimiPlot(species, fl, bamdir, genelis):
    global inputpsi
    global outputdir
    try:
        countinp = fl.CountsFile()
        root_dir = fl.RootDir()
    except Exception:
        root_dir = fl
        search_dir = root_dir + '/ExpressionInput'
        files = unique.read_directory(search_dir)
        for file in files:
            if 'counts.' in file and 'steady-state.txt' not in file:
                countinp = search_dir + '/' + file

    inputpsi = root_dir + '/AltResults/AlternativeOutput/' + species + '_RNASeq_top_alt_junctions-PSI.txt'
    #outputdir=findParentDir(inputpsi)+"sashimiplots"
    outputdir = root_dir + '/ExonPlots'
    outputdir = root_dir + '/SashimiPlots'
    try:
        os.mkdir(unique.filepath(outputdir))
    except Exception:
        pass
    #print bamdir
    #print countinp
    #print inputpsi
    #print genelis
    Sashimiplottting(bamdir, countinp, inputpsi, genelis)

    gene_label, gene_sym = genelist(inputpsi)
    for filename in os.listdir(outputdir):
        if '.pdf' in filename:
            newname = string.split(filename, ':')
            if newname[0] in gene_sym:
                new_filename = str(filename)
                if ':' in filename:
                    new_filename = string.split(filename, ':')[1]
                elif '\\' in filename:
                    new_filename = string.split(filename, '\\')[1]
                elif '/' in filename:
                    new_filename = string.split(filename, '/')[1]
                nnname = gene_sym[newname[0]] + '-SashimiPlot_' + new_filename
                os.rename(os.path.join(outputdir, filename),
                          os.path.join(outputdir, nnname))
            else:
                continue
Exemplo n.º 37
0
def genelist(fname):
    fname = unique.filepath(fname)
    lis=[]
    for line in open(fname,'rU').xreadlines():
        line = cleanUpLine(line)
        t=string.split(line,'\t')
	gene=string.split(t[2],':')
	val=t[2]+' '+t[3]
	lis.append(val)
	
        if gene[0] in gene_sym:
		continue	
	else:
		gene_sym[gene[0]]=t[0]
	       
	
        #print t[0]
    return lis,gene_sym
Exemplo n.º 38
0
def remoteSashimiPlot(species,fl,bamdir,genelis):
    global inputpsi
    global outputdir
    try:
	countinp = fl.CountsFile()
	root_dir = fl.RootDir()
    except Exception:
	root_dir = fl
        search_dir = root_dir+'/ExpressionInput'
        files = unique.read_directory(search_dir)
        for file in files:
            if 'counts.' in file and 'steady-state.txt' not in file:
                    countinp = search_dir+'/'+file
    
    inputpsi = root_dir+'/AltResults/AlternativeOutput/'+species+'_RNASeq_top_alt_junctions-PSI.txt'
    #outputdir=findParentDir(inputpsi)+"sashimiplots"
    outputdir = root_dir+'/ExonPlots'
    outputdir = root_dir+'/SashimiPlots'
    try: os.mkdir(unique.filepath(outputdir))
    except Exception: pass
    #print bamdir
    #print countinp
    #print inputpsi
    #print genelis
    Sashimiplottting(bamdir,countinp,inputpsi,genelis)

    gene_label,gene_sym=genelist(inputpsi)
    for filename in os.listdir(outputdir):
	if '.pdf' in filename:
	    newname=string.split(filename,'__')
	    if newname[0] in gene_sym:
		new_filename = str(filename)
		if '__' in filename:
		    new_filename = string.split(filename,'__')[1]
		elif '\\' in filename:
		    new_filename = string.split(filename,'\\')[1]
		elif '/' in filename:
		    new_filename = string.split(filename,'/')[1]
	        nnname=gene_sym[newname[0]]+'-SashimiPlot_'+new_filename
		os.rename(os.path.join(outputdir, filename), os.path.join(outputdir,nnname))
	    else:
		continue
Exemplo n.º 39
0
def importAgilentExpressionValues(filename, array, channel_to_extract):
    """ Imports Agilent Feature Extraction files for one or more channels """
    print '.',
    red_expr_db = {}
    green_expr_db = {}
    parse = False
    fn = unique.filepath(filename)
    for line in open(fn, 'rU').xreadlines():
        data = UI.cleanUpLine(line)
        if parse == False:
            if 'ProbeName' in data:
                headers = string.split(data, '\t')
                pn = headers.index('ProbeName')
                try:
                    gc = headers.index('gProcessedSignal')
                except Exception:
                    pass
                try:
                    rc = headers.index('rProcessedSignal')
                except Exception:
                    pass
                parse = True
        else:
            t = string.split(data, '\t')
            probe_name = t[pn]
            try:
                green_channel = math.log(float(t[gc]) + 1, 2)  #min is 0
            except Exception:
                pass
            try:
                red_channel = math.log(float(t[rc]) + 1, 2)  #min is 0
            except Exception:
                pass
            if 'red' in channel_to_extract:
                red_expr_db[probe_name] = red_channel
            if 'green' in channel_to_extract:
                green_expr_db[probe_name] = green_channel

    if 'red' in channel_to_extract:
        red_channel_db[array] = red_expr_db
    if 'green' in channel_to_extract:
        green_channel_db[array] = green_expr_db
Exemplo n.º 40
0
def importSplicingEvents(folder):
    dataset_events = {}
    files = unique.read_directory(folder)
    for file in files:
        if 'PSI.' in file and '.txt' in file:
            events = []
            dataset = file[:-4]
            fn = unique.filepath(folder + '/' + file)
            firstRow = True
            for line in open(fn, 'rU').xreadlines():
                data = cleanUpLine(line)
                t = string.split(data, '\t')
                if firstRow:
                    index = 0
                    """ Standard Fields from MultiPath-PSI """
                    for i in t:
                        if 'Event-Direction' == i:
                            ed = index
                        if 'ClusterID' == i:
                            ci = index
                        if 'AltExons' == i:
                            ae = index
                        if 'EventAnnotation' == i:
                            ea = index
                        if 'Coordinates' == i:
                            co = index
                        index += 1
                    firstRow = False
                else:
                    id = t[0]
                    event_direction = t[ed]
                    clusterID = t[ci]
                    altExons = t[ae]
                    coordinates = t[co]
                    ei = EventInformation(id, event_direction, clusterID,
                                          altExons, coordinates)
                    events.append(ei)
            dataset_events[dataset] = events
    return dataset_events
Exemplo n.º 41
0
def download(url,dir,file_type):
    global suppress_printouts
    try: suppress_printouts = Suppress_Printouts
    except Exception: suppress_printouts = 'no'
    try: dp = download_protocol(url,dir,file_type); output_filepath, status  = dp.getStatus(); fp = output_filepath
    except Exception:
        try:
            dir = unique.filepath(dir) ### Can result in the wrong filepath exported for AltDatabase RNA-Seq zip files (don't include by default)
            dp = download_protocol(url,dir,file_type); output_filepath, status  = dp.getStatus(); fp = output_filepath
        except Exception:
            output_filepath='failed'; status = "Internet connection not established. Re-establish and try again."
            fp = filepath(dir+url.split('/')[-1]) ### Remove this empty object if saved
    if 'Internet' not in status:
        if '.zip' in fp or '.gz' in fp or '.tar' in fp:
            #print "\nRemoving zip file:",fp
            try: os.remove(fp); status = 'removed'
            except Exception: null=[] ### Not sure why this error occurs since the file is not open
            #print "\nRemoving zip file:",string.replace(fp,'.gz','')
            if '.tar' in fp:
                try: os.remove(string.replace(fp,'.gz',''))
                except Exception: null=[]
    return output_filepath, status
Exemplo n.º 42
0
def eCLIPimport(folder):
    eCLIP_dataset_peaks = {}
    files = unique.read_directory(folder)
    for file in files:
        if '.bed' in file:
            peaks = []
            dataset = file[:-4]
            fn = unique.filepath(folder + '/' + file)
            for line in open(fn, 'rU').xreadlines():
                data = cleanUpLine(line)
                t = string.split(data, '\t')
                chr = t[0]
                start = int(t[1])
                end = int(t[2])
                strand = t[5]
                annotation = t[6]
                gene = string.split(t[8], '.')[0]
                symbol = t[-2]
                pi = PeakInformation(chr, start, end, strand, annotation, gene,
                                     symbol)
                peaks.append(pi)
            eCLIP_dataset_peaks[dataset] = peaks
    return eCLIP_dataset_peaks
Exemplo n.º 43
0
def importPSIJunctions(fname):
    All_PSI_Reciprocol_Junctions = []
    fname = unique.filepath(fname)
    header = True
    for line in open(fname, 'rU').xreadlines():
        line = line.rstrip(os.linesep)
        if header: header = False
        else:
            t = string.split(line, '\t')
            junction1 = t[2]
            junction2 = t[3]
            try:
                ### Re-order these to have the exclusion be listed first
                j1a, j1b = string.split(t[2], '-')
                j2a, j2b = string.split(t[3], '-')
                j1a = string.split(j1a, ':')[1]
                j2a = string.split(j2a, ':')[1]
                j1a = int(float(string.split(j1a, '.')[0][1:]))
                j1b = int(float(string.split(j1b, '.')[0][1:]))
                j2a = int(float(string.split(j2a, '.')[0][1:]))
                j2b = int(float(string.split(j2b, '.')[0][1:]))
                #print [j1a,j2a,j1b,j2b], t[2], t[3]
                event1 = string.replace(junction2, ":",
                                        "__")  ### first listed junction
                event2 = string.replace(junction2, ":",
                                        "__")  ### second listed junction
                if j1a > j2a or j1b < j2b:
                    event_pair = event1, event2
                else:
                    event_pair = event2, event1
            except Exception:
                #print traceback.format_exc();sys.exit()
                event_pair = event1, event2
            if '-' not in event1:
                event_pair = event2, event1
            All_PSI_Reciprocol_Junctions.append(event_pair)
    return All_PSI_Reciprocol_Junctions
Exemplo n.º 44
0
def update_plot_settings(bamdir,group_psi_values,sample_headers):
    ### This functions writes out the sample orders, colors and sequence coverage for each BAM files for SashimiPlot
    bams=[]
    sample_colors=[]
    sample_coverage=[]
    colors = ['red','blue','green','grey','orange','purple','yellow','peach','pink','violet','magenta','navy']
    colors = colors*300
    color_index=0

    for group in group_psi_values:
        for index in group_psi_values[group]:
            g=sample_headers[index].replace('.bed','.bam')
            bams.append('"'+g+'"')
            sample_colors.append('"'+colors[color_index]+'"')
            sample_coverage.append(str(int(sampleReadDepth[index])))
        color_index+=1 ### reset for the new group
    bams = string.join(bams,',')
    sample_colors = string.join(sample_colors,',')
    sample_coverage = string.join(sample_coverage,',')
    
    export_pl=open(unique.filepath('Config/sashimi_plot_settings.txt'),'w')
    export_pl.write('[data]\n')
    export_pl.write('bam_prefix = '+bamdir+'\n')
    export_pl.write('bam_files =['+bams+']\n')

    export_pl.write('\n')
    export_pl.write('[plotting]')
    export_pl.write('\n') 
    export_pl.write('fig_width = 7 \nfig_height = 7 \nintron_scale = 30 \nexon_scale = 4 \nlogged = False\n')
    export_pl.write('font_size = 6 \nbar_posteriors = False \nnyticks = 4 \nnxticks = 4 \n')
    export_pl.write('show_ylabel = False \nshow_xlabel = True \nshow_posteriors = False \nnumber_junctions = True \n')
    export_pl.write('resolution = .5 \nposterior_bins = 40 \ngene_posterior_ratio = 5 \n')
    export_pl.write('colors =['+sample_colors+']\n')
    export_pl.write('coverages =['+sample_coverage+']\n')
    
    export_pl.write('bar_color = "b" \nbf_thresholds = [0, 1, 2, 5, 10, 20]')
    export_pl.close()
Exemplo n.º 45
0
def reimportFeatures(featureFile):
    gene_event_db={}
    featureFile = unique.filepath(featureFile)
    head=0
    for line in open(featureFile,'rU').xreadlines():
     #for k in range(len(a['AltAnalyze_ID'])):
     if head ==0: head=1
     else:
	line = line.rstrip('\n')
	event=string.split(line,'\t')[0] #example event: ENSMUSG00000025915:E17.2-E17.5=chr1:9885753-9886047
	event = string.replace(event,':','__')
	event_split=string.split(event,'__')
	for i in range(len(event_split)):
	    if "ENS" in event_split[i] or '00000' in event_split[i]:
		if '-' in event_split[i]:
		    ji=string.split(event_split[i],'-')
		    gene=ji[1]
		else:
		    gene=event_split[i]
		if gene in gene_event_db:
		    gene_event_db[gene].append(event)
		else:
		    gene_event_db[gene]=[event]
    return gene_event_db
Exemplo n.º 46
0
	max_len: define the upper limitation for the length of command string. A
		command string will be passed to R by a temporary file if it is longer
		than this value.
	use_numpy: Used as a boolean value. A False value will disable numpy even
		if it has been imported.
	use_dict: named list will be returned a dict if use_dict is True, otherwise
		it will be a list of tuples (name, value).
	host: The computer name (or IP) on which the R interpreter is
		installed. The value "localhost" means that the R locates on the
		the localhost computer. On POSIX systems (including Cygwin
		environment on Windows), it is possible to use R on a remote
		computer if the command "ssh" works. To do that, the user need set
		this value, and perhaps the parameter "user".
	user: The user name on the remote computer. This value need to be set
		only if the user name is different on the remote computer. In
		interactive environment, the password can be input by the user if
		prompted. If running in a program, the user need to be able to
		login without typing password! 
	ssh: The program to login to remote computer.
	'''
	if isinstance(Robj, basestring):
		Robj = R(RCMD=Robj, max_len=max_len, use_numpy=use_numpy, use_dict=use_dict, host=host, user=user, ssh=ssh)
	rlt = Robj.run(CMDS=CMDS)
	if len(rlt) == 1: rlt = rlt[0]
	return rlt
	
if __name__ == '__main__':
	import unique
	path = unique.filepath("AltDatabase/R/Contents/MacOS/R")
	r = R(RCMD='R',use_numpy=True)
Exemplo n.º 47
0
def remoteGene(gene,Species,root_dir,comparison_file):
    global Transcript_Annotations_File
    global ExonRegion_File
    global Selected_Gene
    global Prt_Trans_File
    global Prt_Regions_File
    global Prt_Boundaries_File
    global SplicingIndex_File
    global UniPrt_Regions_File
    global microRNA_File
    global domainAnnotation_db
    global platform
    global species

    Selected_Gene = str(gene)
    species = Species
    
    comparison_name = string.split(export.findFilename(comparison_file),'.')[0]
    ExonRegion_File = unique.filepath("AltDatabase/ensembl/"+species+"/"+species+"_Ensembl_exon.txt")
    Transcript_Annotations_File = unique.filepath("AltDatabase/ensembl/"+species+"/"+species+"_Ensembl_transcript-annotations.txt")
    Prt_Trans_File = searchDirectory("AltDatabase/ensembl/"+species+"/",'Ensembl_Protein')
    Prt_Regions_File = searchDirectory("AltDatabase/ensembl/"+species+"/",'ProteinFeatures')
    Prt_Boundaries_File = searchDirectory("AltDatabase/ensembl/"+species+"/",'ProteinCoordinates')
    UniPrt_Regions_File = searchDirectory("AltDatabase/uniprot/"+species+"/",'FeatureCoordinate')
    SplicingIndex_File = searchDirectory(root_dir+'/AltResults/ProcessedSpliceData/','splicing-index',secondary=comparison_name)
    platform = getPlatform(SplicingIndex_File)
    microRNA_File = searchDirectory("AltDatabase/"+species+"/"+platform,'microRNAs_multiple')
    #print(SplicingIndex_File)

    total_val = ProteinCentricIsoformView(Selected_Gene)
    junctions = total_val[0]
    p_boundaries = total_val[1]
    p_domains = total_val[2]
    transcript_db = total_val[3]
    exon_db = total_val[4]
    splice_db = total_val[5]
    microRNA_db = total_val[6]
    domainAnnotation_db = total_val[7]

    #for i in exon_db:
    #    print("THE", i, exon_db[i], "\n")

    #for i in microRNA_db:
    #        m_test = microRNA_db[i]
    #    print(len(m_test))
    #    for q in m_test:
    #        print("microRNA", q.ExonBlock(), q.Description(), q.BP(), "\n")

    #for i in exon_db["ENST00000349238"]:
    #    print(i[2].EnsemblRegion())
    
    domain_color_list = []
    for i in p_domains:
        ploy = p_domains[i]
        for a in ploy:
            domain_color_list.append(a[1])

    domain_color_list = list(set(domain_color_list))
    domain_color_key = {}
    c_color1 = [0.8, 0.6, 0.1]
    c_color2 = [0.1, 0.6, 0.8]
    c_color3 = [0.6, 0.1, 0.8]
    c_color4 = [0.95, 0.6, 0.3]
    c_color5 = [0.3, 0.6, 0.95]
    c_color6 = [0.6, 0.3, 0.95]
    FLAG = 1

    for item in domain_color_list:
        if(FLAG == 1):
            domain_color_key[item] = c_color1
            FLAG = FLAG + 1
            continue
        if(FLAG == 2):
            domain_color_key[item] = c_color2
            FLAG = FLAG + 1
            continue
        if(FLAG == 3):
            domain_color_key[item] = c_color3
            FLAG = FLAG + 1
            continue
        if(FLAG == 4):
            domain_color_key[item] = c_color4
            FLAG = FLAG + 1
            continue
        if(FLAG == 5):
            domain_color_key[item] = c_color5
            FLAG = FLAG + 1
            continue
        if(FLAG == 6):
            domain_color_key[item] = c_color6
            FLAG = 1
            continue

    #for i in domain_color_key:
        #print(i, domain_color_key[i], "\n")
    
    Y = 100
    Transcript_to_Y = {}
    for transcript in transcript_db:
        Transcript_to_Y[transcript] = Y
        Y = Y + 300
    import traceback

    def onpick(event):
        #ind = event.ind
        print(event.artist.get_label())

    #for i in domainAnnotation_db: print(i,len(domainAnnotation_db));break
    
    fig = pylab.figure()
    
    ylim = Y + 200
    currentAxis = pylab.gca()
    #ax = pylab.axes()
    ax = fig.add_subplot(111)
    X_Pos_List = []
    CoordsBank = []
    
    for transcript in transcript_db:
        try:
            Junc_List = junctions[transcript]
            y_pos = Transcript_to_Y[transcript]
            Gene_List = exon_db[transcript]
            color_flag = 1
            for entry in Gene_List:
                G_start = entry[0][0]
                G_end = entry[0][1]
                Exon_Object = entry[2]
                try:
                    LabelClass = splice_db[Exon_Object.EnsemblRegion()]
                    ExonName = Exon_Object.EnsemblExon()
                    RegCall = LabelClass.RegCall()
                    SplicingIndex = LabelClass.SplicingIndex()
                    PVal = LabelClass.PVal()
                    Midas = LabelClass.Midas()
                    Label = "\n" + "Exon: " + str(ExonName) + "\n" + "RegCall: "  + str(RegCall) + "\n" + "Splicing Index: " + str(SplicingIndex) + "\n" + "P-Value: " + str(PVal) + "\n" + "Midas Value: " + str(Midas) + "\n"
                    Label = string.replace(Label,"\n"," ")
                    if(RegCall == "UC"):
                        color_choice = "Grey"
                    else:
                        S_Int = float(SplicingIndex)
                        if(S_Int > 0):
                            #color_choice = (0.7, 0.7, 0.99)
                            color_choice = 'blue'
                        if(S_Int < 0):
                            #color_choice = (0.8, 0.4, 0.4)
                            color_choice = 'red'
                                            
                except:
                    #print(traceback.format_exc());sys.exit()
                    Label = ""
                    color_choice = "Grey"
                #print("Start", G_start, "end", G_end, "Region", entry[2].EnsemblRegion())
                if((color_flag % 2) == 0):
                    currentAxis.add_patch(Rectangle((G_start, y_pos), (G_end - G_start), 50, color = color_choice, label = (entry[2].EnsemblRegion() + Label), picker = True))
                    y_end = y_pos + 50
                    try: CoordsBank.append((G_start, G_end, y_pos, y_end, 'Exon: '+entry[2].EnsemblRegion()+' '+ 'SI: '+str(SplicingIndex)[:4]+' Pval: '+str(Midas)[:4]))
                    except Exception:
                        CoordsBank.append((G_start, G_end, y_pos, y_end, 'Exon: '+entry[2].EnsemblRegion()))
                    #print(entry[2].EnsemblRegion(),y_pos,y_end)
                if((color_flag % 2) != 0):                   
                    currentAxis.add_patch(Rectangle((G_start, y_pos), (G_end - G_start), 50, color = color_choice, label = (entry[2].EnsemblRegion() + Label), picker = True))
                    y_end = y_pos + 50
                    try: CoordsBank.append((G_start, G_end, y_pos, y_end, 'Exon: '+entry[2].EnsemblRegion()+' '+ 'SI: '+str(SplicingIndex)[:4]+' p-value: '+str(Midas)[:4]))
                    except Exception:
                        CoordsBank.append((G_start, G_end, y_pos, y_end, 'Exon: '+entry[2].EnsemblRegion()))
                    #print(entry[2].EnsemblRegion(),y_pos,y_end)
                color_flag = color_flag + 1
                if(entry[2].EnsemblRegion() in microRNA_db):
                    microRNA_object = microRNA_db[entry[2].EnsemblRegion()]
                    mr_label = "MICRORNA MATCHES" + "\n"
                    for class_object in microRNA_object:
                        mr_exonname = class_object.ExonBlock()
                        mr_desc = class_object.Description() + " " + class_object.Algorithms()
                        #print(mr_desc)
                        mr_label = mr_label + mr_desc + "\n"
                    
                    currentAxis.add_patch(Rectangle((G_start, (y_pos - 75)), (G_end - G_start), 40, color = "Green", label = (mr_label), picker = True))
                    y_start = y_pos - 75
                    y_end = y_pos - 35
                    CoordsBank.append((G_start, G_end, y_start, y_end, mr_desc))
                
            for entry in Junc_List:
                junctionID = entry[-1]
                try:
                    LabelClass = splice_db[entry[2]]
                    RegCall = LabelClass.RegCall()
                    SplicingIndex = LabelClass.SplicingIndex()
                    PVal = LabelClass.PVal()
                    Midas = LabelClass.Midas()
                    Label = "\n" + "RegCall: " + str(RegCall) + "\n" + "Splicing Index: " + str(SplicingIndex) + "\n" + "P-Value: " + str(PVal) + "\n" + "Midas Value: " + str(Midas) + "\n"
                    if(float(SplicingIndex) > 0):
                        color_junc = "blue"
                    if(float(SplicingIndex) < 0):
                        color_junc = "red"
                    if(RegCall == "UC"):
                        color_junc = "grey"
                except:
                    Label = ""
                    color_junc = "grey"
                currentAxis.add_patch(Rectangle((entry[0], y_pos), (entry[1] - entry[0]), 50, color = "White", label = (str(entry[2]) + Label), picker = True))
                ax.arrow(entry[0], (y_pos+50), 8, 40, label = (str(entry[2]) + Label), color = color_junc, picker = True)
                ax.arrow((entry[0] + 8), (y_pos+90), 11, -40, label = (str(entry[2]) + Label), color = color_junc, picker = True)
                y_start = y_pos
                y_end = y_pos + 30
                #print(junctionID,y_start,y_end)
                CoordsBank.append((G_start, G_end, y_start, y_end, junctionID))

            try:
                P_Bound_List = p_boundaries[transcript]
                E_Start = P_Bound_List[-2]
                E_End = P_Bound_List[-1]
                P_Start = P_Bound_List[1]
                P_End = P_Bound_List[2]
                #print("Boundaries: ", P_Start, P_End)
                X_Pos_List.append(int(E_End))
                #currentAxis.add_patch(Rectangle((E_Start, y_pos), E_End, 50, color = "Blue"))
                try:
                    currentAxis.add_patch(Rectangle((P_Start, (y_pos + 120)), (P_End - P_Start), 10))
                except:
                    pass
                p_label_list = ["DEF"]
                #CoordsBank.append((P_Start, P_End, y_pos, P_End - P_Start, transcript)) ### Added by NS - needs work
                try: P_Domain_List = p_domains[transcript]
                except Exception: P_Domain_List=[]
                for entry in P_Domain_List:
                    #print("Domain", entry)
                    color_domain_choice = domain_color_key[entry[1]]
                    domain_annotation = domainAnnotation_db[entry[1]]
                    #domain_annotation = string.replace(domain_annotation,'REGION-','')
                    p_label = (str(entry[0]) +  " " + str(domain_annotation))
                    #print(entry[0], entry[2], entry[3], P_Start, P_End, domain_annotation, )
                    Repeat_Flag = 0
                    for i in p_label_list:
                        if(p_label == i):
                            Repeat_Flag = 1
                    if(Repeat_Flag == 1):
                        continue
                    p_label_list.append(p_label)               
                    currentAxis.add_patch(Rectangle((entry[2], y_pos + 100), (entry[3] - entry[2]), 50, color = color_domain_choice, label= p_label, picker = True))
                    y_start = y_pos + 100
                    y_end = y_pos + 150
                    CoordsBank.append((entry[2], entry[3], y_start, y_end, p_label))
            except Exception:
                pass
                #print(traceback.format_exc())
        except:
            #print(traceback.format_exc())
            pass
    pylab.ylim([0.0, ylim])
    try:
        max_x = max(X_Pos_List)
    except:
        max_x = 5000
    try:
        pylab.xlim([0.0, max_x])
    except:
        pylab.xlim([0.0, 3000])
    fig.canvas.mpl_connect('pick_event', onpick)
    def format_coord(x, y):
        for m in CoordsBank:
            if(x >= m[0] and x <= m[1] and y >= m[2] and y <= m[3]):
                string_display = m[4]
                return string_display
        string_display = "  "
        return string_display

    ax.format_coord = format_coord
    #datacursor(hover=True, formatter='{label}'.format, bbox=dict(fc='yellow', alpha=1), arrowprops=None)
    pylab.show()
Exemplo n.º 48
0
def remoteSashimiPlot(Species, fl, bamdir, eventsToVisualizeFilename, events=None, show=False):
    global PSIFilename
    global outputdir
    global root_dir
    global steady_state_exp_file
    global species
    species = Species

    try:
        countinp = fl.CountsFile()
        root_dir = fl.RootDir()
    except Exception:
        root_dir = fl
        search_dir = root_dir + "/ExpressionInput"
        files = unique.read_directory(search_dir)
        for file in files:
            if "counts." in file and "steady-state.txt" not in file:
                countinp = search_dir + "/" + file

    PSIFilename = root_dir + "/AltResults/AlternativeOutput/" + species + "_RNASeq_top_alt_junctions-PSI.txt"

    import ExpressionBuilder

    dir_list = unique.read_directory(root_dir + "/ExpressionInput")
    for file in dir_list:
        if "exp." in file and "steady-state" not in file:
            exp_file = root_dir + "/ExpressionInput/" + file
        elif "exp." in file and "steady-state" in file:
            steady_state_exp_file = root_dir + "/ExpressionInput/" + file
    global sample_group_db
    sample_group_db = ExpressionBuilder.simplerGroupImport(exp_file)

    # outputdir=findParentDir(PSIFilename)+"sashimiplots"
    outputdir = root_dir + "/ExonPlots"
    outputdir = root_dir + "/SashimiPlots"
    try:
        os.mkdir(unique.filepath(outputdir))
    except Exception:
        pass

    if show:
        s = open(outputdir + "/show.txt", "w")
        s.write("TRUE")
        s.close()
    else:
        s = open(outputdir + "/show.txt", "w")
        s.write("FALSE")
        s.close()

    geneSymbol_db = Sashimiplottting(bamdir, countinp, PSIFilename, eventsToVisualizeFilename, events=events)
    for filename in os.listdir(outputdir):
        if ".pdf" in filename or ".png" in filename:
            fn = string.replace(filename, ".pdf", "")
            fn = string.replace(fn, ".png", "")
            newname = string.split(fn, "__")
            if newname[0] in geneSymbol_db:
                new_filename = str(filename)
                if "__" in filename:
                    new_filename = string.split(filename, "__")[1]
                elif "\\" in filename:
                    new_filename = string.split(filename, "\\")[1]
                elif "/" in filename:
                    new_filename = string.split(filename, "/")[1]
                nnname = geneSymbol_db[newname[0]][0] + "-SashimiPlot_" + new_filename
                try:
                    os.rename(os.path.join(outputdir, filename), os.path.join(outputdir, nnname))
                except Exception:
                    if "already exists" in traceback.format_exc():
                        ### File already exists, delete the new one
                        try:
                            os.remove(os.path.join(outputdir, nnname))
                        except Exception:
                            pass
                        ### Now right the new one
                        try:
                            os.rename(os.path.join(outputdir, filename), os.path.join(outputdir, nnname))
                        except Exception:
                            pass
                    pass
            else:
                continue
    print ""
Exemplo n.º 49
0
def remoteGene(gene):
    global Transcript_Annotations_File
    global ExonRegion_File
    global Selected_Gene
    global Prt_Trans_File
    global Prt_Regions_File
    global Prt_Boundaries_File
    global Etc_File
    import unique
    Selected_Gene = gene
    ExonRegion_File = unique.filepath("ExonViewFiles/Hs_Ensembl_exon.txt")
    Transcript_Annotations_File = unique.filepath("ExonViewFiles/Hs_Ensembl_transcript-annotations.txt")
    Prt_Trans_File = unique.filepath("ExonViewFiles/Hs_Ensembl_Protein__65_37.txt")
    Prt_Regions_File = unique.filepath("ExonViewFiles/Hs_ProteinFeatures_build_65_37.txt")
    Prt_Boundaries_File = unique.filepath("ExonViewFiles/Hs_ProteinCoordinates_build_65_37.tab")
    Etc_File = unique.filepath("ExonViewFiles/Hs_RNASeq_K562_SRSF2_P95mut_vs_K562_SRSF2_WT.ExpCutoff-5.0_average-splicing-index-ProcessedSpliceData.txt")
    #"ENSG00000005801"
    #"ENSG00000110514"
    total_val = ProteinCentricIsoformView(Selected_Gene)
    junctions = total_val[0]
    p_boundaries = total_val[1]
    p_domains = total_val[2]
    transcript_db = total_val[3]
    exon_db = total_val[4]
    splice_db = total_val[5]

    #for i in exon_db["ENST00000349238"]:
    #    print(i[2].EnsemblRegion())
    
    domain_color_list = []
    for i in p_domains:
        ploy = p_domains[i]
        for a in ploy:
            domain_color_list.append(a[1])

    domain_color_list = list(set(domain_color_list))
    domain_color_key = {}
    c_color1 = [0.8, 0.6, 0.1]
    c_color2 = [0.1, 0.6, 0.8]
    c_color3 = [0.6, 0.1, 0.8]
    c_color4 = [0.95, 0.6, 0.3]
    c_color5 = [0.3, 0.6, 0.95]
    c_color6 = [0.6, 0.3, 0.95]
    FLAG = 1

    for item in domain_color_list:
        if(FLAG == 1):
            domain_color_key[item] = c_color1
            FLAG = FLAG + 1
            continue
        if(FLAG == 2):
            domain_color_key[item] = c_color2
            FLAG = FLAG + 1
            continue
        if(FLAG == 3):
            domain_color_key[item] = c_color3
            FLAG = FLAG + 1
            continue
        if(FLAG == 4):
            domain_color_key[item] = c_color4
            FLAG = FLAG + 1
            continue
        if(FLAG == 5):
            domain_color_key[item] = c_color5
            FLAG = FLAG + 1
            continue
        if(FLAG == 6):
            domain_color_key[item] = c_color6
            FLAG = 1
            continue

    for i in domain_color_key:
        print(i, domain_color_key[i], "\n")
    
    Y = 50
    Transcript_to_Y = {}
    for transcript in transcript_db:
        Transcript_to_Y[transcript] = Y
        Y = Y + 200
    import traceback

    ylim = Y + 200
    currentAxis = plt.gca()
    ax = plt.axes()
    X_Pos_List = []
    for transcript in transcript_db:
        try:
            Junc_List = junctions[transcript]
            y_pos = Transcript_to_Y[transcript]
            Gene_List = exon_db[transcript]
            color_flag = 1
            for entry in Gene_List:
                G_start = entry[0][0]
                G_end = entry[0][1]
                Exon_Object = entry[2]
                try:
                    LabelClass = splice_db[Exon_Object.EnsemblRegion()]
                    ExonName = Exon_Object.EnsemblExon()
                    RegCall = LabelClass.RegCall()
                    SplicingIndex = LabelClass.SplicingIndex()
                    PVal = LabelClass.PVal()
                    Midas = LabelClass.Midas()
                    Label = "\n" + "Exon: " + str(ExonName) + "\n" + "RegCall: "  + str(RegCall) + "\n" + "Splicing Index: " + str(SplicingIndex) + "\n" + "P-Value: " + str(PVal) + "\n" + "Midas Value: " + str(Midas)
                    if(RegCall == "UC"):
                        color_choice = "Grey"
                    else:
                        S_Int = float(SplicingIndex)
                        if(S_Int > 0):
                            color_choice = (0.7, 0.7, 0.99)
                        if(S_Int < 0):
                            color_choice = (0.8, 0.4, 0.4)
                                            
                except:
                    #print(traceback.format_exc());sys.exit()
                    Label = ""
                    color_choice = "Grey"
                #print("Start", G_start, "end", G_end, "Region", entry[2].EnsemblRegion())
                if((color_flag % 2) == 0):
                    currentAxis.add_patch(Rectangle((G_start, y_pos), (G_end - G_start), 50, color = color_choice, label = (entry[2].EnsemblRegion() + Label)))
                if((color_flag % 2) != 0):                   
                    currentAxis.add_patch(Rectangle((G_start, y_pos), (G_end - G_start), 50, color = color_choice, label = (entry[2].EnsemblRegion() + Label)))
                color_flag = color_flag + 1
                
                

            for entry in Junc_List:
                try:
                    LabelClass = splice_db[entry[2]]
                    RegCall = LabelClass.RegCall()
                    SplicingIndex = LabelClass.SplicingIndex()
                    PVal = LabelClass.PVal()
                    Midas = LabelClass.Midas()
                    Label = "\n" + "RegCall: " + str(RegCall) + "\n" + "Splicing Index: " + str(SplicingIndex) + "\n" + "P-Value: " + str(PVal) + "\n" + "Midas Value: " + str(Midas)
                    if(float(SplicingIndex) > 0):
                        color_junc = "blue"
                    if(float(SplicingIndex) < 0):
                        color_junc = "red"
                    if(RegCall == "UC"):
                        color_junc = "grey"
                except:
                    Label = ""
                    color_junc = "grey"
                currentAxis.add_patch(Rectangle((entry[0], y_pos), (entry[1] - entry[0]), 50, color = "White", label = (str(entry[2]) + Label)))
                ax.arrow(entry[0], (y_pos+50), 8, 40, label = (str(entry[2]) + Label), color = color_junc)
                ax.arrow((entry[0] + 8), (y_pos+90), 11, -40, label = (str(entry[2]) + Label), color = color_junc)


            P_Bound_List = p_boundaries[transcript]
            P_Domain_List = p_domains[transcript]
            E_Start = P_Bound_List[-2]
            E_End = P_Bound_List[-1]
            P_Start = P_Bound_List[1]
            P_End = P_Bound_List[2]
            #print("Boundaries: ", P_Start, P_End)
            X_Pos_List.append(int(E_End))
            #currentAxis.add_patch(Rectangle((E_Start, y_pos), E_End, 50, color = "Blue"))
            try:
                currentAxis.add_patch(Rectangle((P_Start, (y_pos + 120)), (P_End - P_Start), 10, label = ("Protein: " + str(P_Bound_List[0]))))
            except:
                pass
            for entry in P_Domain_List:
                #print("Domain", entry)
                color_domain_choice = domain_color_key[entry[1]]
                currentAxis.add_patch(Rectangle((entry[2], y_pos + 100), (entry[3] - entry[2]), 50, color = color_domain_choice, label= ("Protein: " + str(entry[0]) + "\n" + "Domain: " + str(entry[1]))))
        except:
            continue
    plt.ylim([0.0, ylim])
    try:
        max_x = max(X_Pos_List)
    except:
        max_x = 5000
    try:
        plt.xlim([0.0, max_x])
    except:
        plt.xlim([0.0, 3000])
    datacursor(hover=True, formatter='{label}'.format, bbox=dict(fc='yellow', alpha=1), arrowprops=None)
    plt.show()
Exemplo n.º 50
0
def sashmi_plot_list(bamdir,fname,gene_label,lines,samp,gene_sym):
    splicing_events=[]
    type = None
    firstLine = True
    for line in open(fname,'rU').xreadlines():
	line = cleanUpLine(line)
	t = string.split(line,'\t')
	if firstLine:
	    if 'junctionID-1' in t:
		j1i = t.index('junctionID-1')
		j2i = t.index('junctionID-2')
		type='ASPIRE'
	    if 'ANOVA' in t:
		type='PSI'
	    elif 'independent confirmation' in t:
		type='confirmed'
	    elif 'ANOVA' in fname:
		type = 'ANOVA'
	    firstLine=False
	if ' ' in t[0] and ':' in t[0]:
	    splicing_events.append(t[0])
	elif type=='ASPIRE':
	    splicing_events.append(t[j1i] +' '+ t[j2i])
	elif type=='ANOVA':
	    try:
		a,b = string.split(t[0],'|')
		a = string.split(a,':')
		a = string.join(a[1:],':')
		splicing_events.append(a +' '+ b)
	    except Exception: pass
	elif type=='PSI':
	    try:
		j1,j2 = string.split(t[0],'|')
		a,b,c = string.split(j1,':')
		j1 = b+':'+c
		splicing_events.append(j1 +' '+ j2)
	    except Exception:
		#print traceback.format_exc();sys.exit()
		pass
	elif type=='confirmed':
	    try:
		event_pair1 = string.split(t[1],'|')[0]
		a,b,c,d = string.split(event_pair1,'-')
		splicing_events.append(a+'-'+b +' '+ c+'-'+d)
	    except Exception: pass

    if len(splicing_events)==0:
	forceNoCompatibleEventsInFile
    
    print 'Exporting plots',
    for li in splicing_events:
	if ":U" in li or "-U" in li:
	    
	    continue
	else:
	
	 li=cleanUpLine(li)
	 #print li
	 
	 #dem[0]=['ENSG00000132424:I10.1 ENSG00000132424:E10.1-E11.1','ENSG00000146147:E10.3-E11.1 ENSG00000146147:E9.3-E15.1']
	 de=string.split(li,'\t')
	 dem[0]=de
	 #print dem[0]
	 for key in dem:
	  for i in range(len(dem[key])):
	    list1=[]
	    list2=[]
	    try:
		k=gene_label.index(dem[key][i])
		flag=1
		lt=cleanUpLine(lines[k])
		t=string.split(lt,'\t')
		#print t
		t=t[11:]
		#print t
		#list3=[]
		#ind=[]
		for x in range(len(t)):
		    #print x,t[x]
		    if(t[x]!=''):
			if float(t[x]) < 0.8:
			    list1.append(x)
			    #print x
			    #print 'list1:'+str(x)
			else:
			    list2.append(x)
			    #print x
			   # print str(x)
		     
		    else:
			continue
	    
		if len(list1)>5:
		    list1=list1[1:5]
		if len(list2)>5:
		    list2=list2[1:5]
		#print len(list1),len(list2)
	    except Exception:
		
		for ij in range(len(samp)):
		    list1.append(ij)
	    update_plot_settings(bamdir,list1,list2,samp)
	    
	    a=string.split(dem[key][i]," ")
	    if '-' in a[1]:
		    
		    ch1=a[1]
		    f=string.split(a[0],':')
	    else:
		    ch1=a[0]
		    f=string.split(a[1],':')
	    event=findParentDir(inputpsi)
	    event=event+"trial_index/"
	    setting =unique.filepath("Config/sashimi_plot_settings.txt")
	    try: ch1=string.replace(ch1,':','__')
	    except Exception: pass
	    name=ch1
	    #outputdir=findParentDir(inputpsi)+"sashimiplots"
	    try: os.makedirs(outputdir)
	    except Exception: pass
	    
	#print '********',[ch1],[event],outputdir

	try:
	    ssp.plot_event(ch1,event,setting,outputdir)
	except Exception:
	    #print '^^^^^^^^^^^^',[ch1],[event],outputdir;sys.exit()
	    #print traceback.format_exc()
	    #print "error2"
	    #sys.exit()
	    continue
    #outputdir=findParentDir(inputpsi)+"sashimiplots" 
    for filename in os.listdir(outputdir):
	newname=string.split(filename,'/')
	#print newname[0]
	if newname[0] in gene_sym:
	    new_path = gene_sym[newname[0]]+'-'+filename
	    #new_path = string.replace()
	    os.rename(filename,new_path)
	else:
	    continue
Exemplo n.º 51
0
def filepath(filename):
    fn = unique.filepath(filename)
    return fn
Exemplo n.º 52
0
def formatAndSubmitSplicingEventsToSashimiPlot(
    filename, bamdir, splicing_events, sample_group_db, groups, expandedSearch
):
    ### Begin exporting parameters and events for SashimiPlot visualization
    firstLine = True
    setting = unique.filepath("Config/sashimi_plot_settings.txt")
    psi_parent_dir = findParentDir(filename)
    if "PSI" not in filename:
        index_dir = string.split(psi_parent_dir, "ExpressionInput")[0] + "AltResults/AlternativeOutput/sashimi_index/"
    else:
        index_dir = psi_parent_dir + "sashimi_index/"

    spliced_junctions = []  ### Alternatively, compare to just one of the junctions
    for splicing_event in splicing_events:
        try:
            j1, j2 = string.split(splicing_event, " ")
            spliced_junctions.append(j1)
            spliced_junctions.append(j2)
        except Exception:
            spliced_junctions.append(splicing_event)  ### single gene ID or junction

    if "PSI" not in filename:
        splicing_events_db = {}
        for event in splicing_events:
            event = string.replace(event, ":", "__")
            if " " in event:
                event = string.split(event, " ")[-1]
            gene = string.split(event, "__")[0]
            try:
                splicing_events_db[gene].append(event)
            except Exception:
                splicing_events_db[gene] = [event]
        splicing_events = splicing_events_db

    import collections

    analyzed_junctions = []
    processed_events = []
    for line in open(filename, "rU").xreadlines():
        line = cleanUpLine(line)
        t = string.split(line, "\t")
        if firstLine:
            if "PSI" in filename:
                sampleIndexBegin = 11
                sample_headers = t[sampleIndexBegin:]
            else:
                sampleIndexBegin = 1
                sample_headers = t[sampleIndexBegin:]
                if ".bed" not in sample_headers[0]:  ### Add .bed if removed manually
                    sample_headers = map(lambda s: s + ".bed", sample_headers)
            index = 0
            sample_group_index = {}
            for s in sample_headers:
                group = sample_group_db[s]
                sample_group_index[index] = group
                try:
                    sampleReadDepth[index] = count_sum_array_db[s]
                except Exception:
                    sampleReadDepth[index] = count_sum_array_db[s]
                index += 1
            firstLine = False
        else:
            if "PSI" in filename:
                splicing_event = val = t[2] + " " + t[3]
                j1 = t[2]
                j2 = t[3]
                if t[2] in analyzed_junctions and t[3] in analyzed_junctions:
                    continue
            else:
                splicing_event = t[0]  ### The gene ID
                j1 = t[0]
                j2 = t[0]
            if ":U" in splicing_event or "-U" in splicing_event:
                continue
            else:
                ### First check to see if the full splicing event matches the entry
                ### If not (and not a PSI regulation hits list), look for an individual junction match
                if splicing_event in splicing_events or (
                    expandedSearch and (j1 in spliced_junctions or j2 in spliced_junctions)
                ):
                    if splicing_event in processed_events:
                        continue
                    if j2 in processed_events:
                        continue
                    if j1 in processed_events:
                        continue
                    processed_events.append(splicing_event)
                    processed_events.append(j1)
                    processed_events.append(j2)
                    # print processed_events, splicing_event
                    if "PSI" in filename:
                        geneID = string.split(t[2], ":")[0]
                        symbol = t[0]
                        analyzed_junctions.append(t[2])
                        analyzed_junctions.append(t[3])
                    else:  ### For exp.dataset-steady-state.txt files
                        geneID = splicing_event
                        events = splicing_events[geneID]
                    index = 0

                    import collections

                    initial_group_psi_values = {}
                    try:
                        group_psi_values = collections.OrderedDict()
                    except Exception:
                        try:
                            import ordereddict

                            group_psi_values = ordereddict.OrderedDict()
                        except Exception:
                            group_psi_values = {}
                    for i in t[sampleIndexBegin:]:  ### Value PSI range in the input file
                        try:
                            group = sample_group_index[index]
                        except Exception:
                            group = None
                        try:
                            try:
                                initial_group_psi_values[group].append([float(i), index])
                            except Exception:
                                initial_group_psi_values[group] = [[float(i), index]]
                        except Exception:
                            # print traceback.format_exc();sys.exit()
                            pass  ### Ignore the NULL values
                        index += 1
                    ### limit the number of events reported and sort based on the PSI values in each group
                    if "None" in groups and len(groups) == 1:
                        initial_group_psi_values["None"].sort()
                        group_size = len(initial_group_psi_values["None"]) / 2
                        filtered_group_index1 = map(lambda x: x[1], initial_group_psi_values["None"][:group_size])
                        filtered_group_index2 = map(lambda x: x[1], initial_group_psi_values["None"][group_size:])
                        group_psi_values["low"] = filtered_group_index1
                        group_psi_values["high"] = filtered_group_index2
                    else:
                        gn = 0
                        for group in groups:
                            gn += 1
                            # if gn>4: break
                            if group in initial_group_psi_values:
                                initial_group_psi_values[group].sort()
                                if len(groups) > 7:
                                    filtered_group_indexes = map(lambda x: x[1], initial_group_psi_values[group][:1])
                                elif len(groups) > 5:
                                    filtered_group_indexes = map(lambda x: x[1], initial_group_psi_values[group][:2])
                                elif len(groups) > 3:
                                    filtered_group_indexes = map(lambda x: x[1], initial_group_psi_values[group][:4])
                                else:
                                    filtered_group_indexes = map(lambda x: x[1], initial_group_psi_values[group][:5])
                                group_psi_values[group] = filtered_group_indexes
                    try:
                        update_plot_settings(bamdir, group_psi_values, sample_headers)
                    except Exception:
                        print "Cannot update the settings file. Likely permissions issue."

                    try:
                        reordered = reorderEvents([t[2] + " " + t[3]])
                        reordered = string.split(reordered[0], " ")
                    except Exception:
                        reordered = [t[2] + " " + t[3]]
                        reordered = string.split(reordered[0], " ")
                        # print reordered
                    if "PSI" in filename:
                        try:
                            formatted_splice_event = string.replace(reordered[1], ":", "__")
                        except Exception:
                            pass
                        ### Submit the query
                        try:
                            ssp.plot_event(formatted_splice_event, index_dir, setting, outputdir)
                            success = True
                        except Exception:
                            success = False
                            # print traceback.format_exc()

                    else:
                        for event in events:
                            try:
                                ssp.plot_event(event, index_dir, setting, outputdir)
                                # print 'success' #formatted_splice_event='ENSMUSG00000000355__E4.1-E5.1'
                            except Exception:  ### If it fails, output the gene-level plot
                                try:
                                    ssp.plot_event(geneID, index_dir, setting, outputdir)
                                    success = True
                                except Exception:
                                    success = False
                                    # print traceback.format_exc()
                    """
                    ### Second attempt
                    if 'PSI' in filename and success==False: ### Only relevant when parsing the junction pairs but not genes
                        try: formatted_splice_event=string.replace(reordered[0],':','__')
                        except Exception: pass
                        try: ssp.plot_event(formatted_splice_event,index_dir,setting,outputdir); # print 'success'
                        except Exception: pass
		    """
    return processed_events
Exemplo n.º 53
0
def ProteinCentricIsoformView(Selected_Gene):
    Transcript_List = []
    Transcript_db = {}
    Exon_db = {}
    for line in open(Transcript_Annotations_File, "rU").xreadlines():
        line = line.rstrip()
        line = line.split("\t")
        if(line[0] == Selected_Gene):
            transcriptID = line[-1]
            exonID = line[5]
            start = line[3]
            stop = line[4]
            strand = line[2]
            chr = line[1]
            if 'chr' not in chr:
                chr = 'chr'+chr
            exon_data = EnsemblRegionClass(start,stop,exonID,None,strand)
            exon_data.setChr(chr)
            Transcript_List.append((transcriptID, exonID))
            try:
                Transcript_db[transcriptID].append(exon_data)
            except Exception:
                Transcript_db[transcriptID]=[exon_data]
            try:
                Exon_db[exonID].append(transcriptID)
            except Exception:
                Exon_db[exonID]=[transcriptID]
    
    Transcript_Protein_db = {}
    Protein_Transcript_db = {}
    Protein_List = []
    count = 0
    for line in open(Prt_Trans_File, "rU").xreadlines():
        if(count == 0):
            count = 1
            continue
        line = line.rstrip()
        line = line.split("\t")
        if(len(line) != 3):
            continue
        geneID = line[0]
        transcriptID = line[1]
        proteinID = line[2]
        if Selected_Gene == geneID:
            Transcript_Protein_db[transcriptID] = proteinID
            Protein_Transcript_db[proteinID] = transcriptID
            Protein_List.append(proteinID)

    #MicroRNA File
    microRNA_db = {}
    for line in open(microRNA_File, "rU").xreadlines():
        line = line.rstrip()
        line = line.split("\t")
        try:
            gene_and_exon_id = line[0].split(":")
            current_gene_id = gene_and_exon_id[0]
            current_exon_id = gene_and_exon_id[1]   
        except Exception:
            continue
        #print([current_gene_id,current_exon_id,Selected_Gene]);break
        current_description = line[1]
        current_base_pairs = line[2]
        algorithms = line[3]
        if(current_gene_id == Selected_Gene):
            m = MicroRNAClass(current_exon_id, current_description, current_base_pairs, algorithms)           
            try:
                if(len(microRNA_db[current_exon_id]) > 6):
                    continue
                microRNA_db[current_exon_id].append(m)
                #print("ADDED!")
                
            except:
                microRNA_db[current_exon_id] = [m]
            
    Transcript_ExonRegion_db={}
    geneExonRegion_db={}
    exon_coord_db={}
    exonRegion_db={}
    AllBlocks = [("E", []), ("I", [])]
    # Store the exon region positions and later link them to the Ensembl exons
    for line in open(ExonRegion_File, "rU").xreadlines():
        line = line.rstrip()
        line = line.split("\t")
        geneID = line[0]
        exon_region = line[1]
        chr = line[2]
        exonID = line[1]
        strand = line[3]
        start = line[4]
        stop = line[5]
        er = EnsemblRegionClass(start,stop,exonID,exon_region,strand)
        if(geneID == Selected_Gene):
                Block_Num = exon_region[1:]
                I_E_id = exon_region[0]
                if(I_E_id == "E"):
                    AllBlocks[0][1].append(Block_Num)
                if(I_E_id == "I"):
                    AllBlocks[1][1].append(Block_Num)
                    continue
                exon_added = False
                #Exon_List = line[7].split("|")
                exon_coord_db[chr,int(start),'start'] = exon_region
                exon_coord_db[chr,int(stop),'stop'] = exon_region
                exonRegion_db[Selected_Gene,exon_region] = er
                #print chr,start,'start'
    probeset_to_ExonID={}
    if platform != 'RNASeq':
        for line in open(unique.filepath('AltDatabase/'+species+'/'+string.lower(platform)+'/'+species+'_Ensembl_probesets.txt'), "rU").xreadlines():
            line = line.rstrip()
            line = line.split("\t")
            gene = line[2]
            if gene == Selected_Gene:
                probeset = line[0]
                exon_region = line[12]
                if '.' not in exon_region:
                    exon_region = string.replace(exon_region,'-','.')
                probeset_to_ExonID[probeset] = exon_region

    ETC_List = []
    for line in open(SplicingIndex_File, "rU").xreadlines():
        line = line.rstrip()
        line = line.split("\t")
        if ':' in line[0]:
            GeneLine = line[0].split(":")
            FeatureID = GeneLine[1]
        else:
            FeatureID = line[0]
        Gene = line[1]
        regcall = line[2]
        spl_index = line[3]
        pval = line[4]
        midas = line[5]
        S_I_data = SplicingIndexClass(regcall, spl_index, pval, midas)
        if(Gene == Selected_Gene):
            if platform != 'RNASeq':
                if FeatureID in probeset_to_ExonID:
                    FeatureID = probeset_to_ExonID[FeatureID]
                    #print(FeatureID)
                    ETC_List.append((FeatureID, S_I_data))
            else:
                try:
                    FeatureID = FeatureID.split("_")
                    FeatureID = FeatureID[0]         
                    ETC_List.append((FeatureID, S_I_data))
                except:
                    pass

    ETC_dict = {}
       
    # Link the exon regions to the Ensembl exons
    for transcriptID in Transcript_db:
        for exon_data in Transcript_db[transcriptID]:
            start = exon_data.Start()
            stop = exon_data.Stop()
            chr = exon_data.Chr()
            strand = exon_data.Strand()
            try:
                start_exon_region = exon_coord_db[chr,start,'start']
                stop_exon_region = exon_coord_db[chr,stop,'stop']
                proceed = True
            except Exception: ### Not clear why this error occurs. Erroring region was found to be an intron region start position (I7.2 ENSMUSG00000020385)
                proceed = False
            if proceed:
                if '-' in strand:
                    stop_exon_region,start_exon_region = start_exon_region,stop_exon_region
                regions = [start_exon_region]
                block,start_region = start_exon_region.split('.')
                start_region = int(float(start_region))
                block,stop_region = stop_exon_region.split('.')
                stop_region = int(float(stop_region))
                region = start_region+1
                while region<stop_region:
                    er = block+'.'+str(region)
                    regions.append(er)
                    region+=1
                if stop_region != start_region:
                    regions.append(stop_exon_region)
                for region in regions:
                    er = exonRegion_db[Selected_Gene,region]
                    try:
                        Transcript_ExonRegion_db[transcriptID].append(er)
                    except:
                        Transcript_ExonRegion_db[transcriptID] = [er]
    
    exon_virtualToRealPos= c.OrderedDict()
    junction_transcript_db = {}
    for transcriptID in Transcript_ExonRegion_db:
            #print('transcripts:',transcriptID)
            position=0
            Buffer=15
            for exon_object in Transcript_ExonRegion_db[transcriptID]:
                if position!=0:
                    if last_exon != exon_object.ExonBlock():
                        #print last_exon_region+'-'+exon_object.EnsemblRegion(),position,Buffer
                        junctionID = last_exon_region+'-'+exon_object.EnsemblRegion()
                        try: junction_transcript_db[transcriptID].append((position,position+Buffer, junctionID)) ### virtual junction positions
                        except: junction_transcript_db[transcriptID] = [(position,position+Buffer, junctionID)]
                        position+=Buffer
                        
                virtualStart = position
                virtualStop = virtualStart + exon_object.Length()
                position = virtualStop
                try:                    
                    exon_virtualToRealPos[transcriptID].append(([virtualStart,virtualStop],[exon_object.Start(), exon_object.Stop()],exon_object))
                except Exception:                    
                    exon_virtualToRealPos[transcriptID]=[([virtualStart,virtualStop],[exon_object.Start(), exon_object.Stop()],exon_object)]
                #print transcriptID,exon_object.ExonBlock(),exon_object.EnsemblExon(),exon_object.EnsemblRegion(),exon_object.Start(),exon_object.Stop(),virtualStart,virtualStop,"\n"
                last_exon = exon_object.ExonBlock()
                last_exon_region = exon_object.EnsemblRegion()

    for i in ETC_List:
        Region = i[0]
        S_I = i[1]
        Region = Region.split("-")
        if(len(Region) > 1):

            #Delete underscores from values.

            R_Start = Region[0]
            R_End = Region[1]
            R_Start = R_Start.split("_")
            R_End = R_End.split("_")
            R_Start = R_Start[0]
            R_End = R_End[0]
            R_Final = R_Start + "-" + R_End
            R_Type = R_Final[0]
            #print(R_Final)
            ETC_dict[R_Final] = S_I
            
        else:
            Region = Region[0]
            Region = Region.split("_")
            Region = Region[0]
            Region_type = Region[0]
            ETC_dict[Region] = S_I
            #if(Region_type == "E"):
            #    for entry in AllBlocks[0][1]:
            #        if(Region[1:] == entry):
            #            ETC_dict[("E" + entry)] = S_I
            #if(Region_type == "I"):
            #    for entry in AllBlocks[1][1]:    
            #        if(Region[1:] == entry):
            #            ETC_dict[("I" + entry)] = S_I

    #for a in ETC_dict:
    #        print(ETC_dict[a].RegCall(), a)
          
    #for i in junction_transcript_db:
    #    print i, junction_transcript_db[i], "\n"
    
    Protein_Pos_Db = {}
    last_protein=None
    stored_stop=None
    for line in open(Prt_Boundaries_File, "rU").xreadlines():
        line = line.rstrip()
        line = line.split("\t")
        proteinID = line[0]
        if(proteinID in Protein_List):
            Stop = int(line[-1])
            Start = int(line[-2])
            if(proteinID != last_protein):
                if stored_stop !=None:
                    #print proteinID,stored_start,stored_stop
                    Protein_Pos_Db[last_protein] = [[stored_start,stored_stop,None]]
                stored_start = int(Start)
            if(proteinID == last_protein):
                stored_stop = int(Stop)
            last_protein = str(proteinID)
    
    Protein_Pos_Db[last_protein] = [(stored_start,stored_stop,None)]
    Protein_virtualPos = RealToVirtual(Protein_Pos_Db, exon_virtualToRealPos, Protein_Transcript_db,Transcript_ExonRegion_db)
    
    Domain_Pos_Db={}
    domainAnnotation_db={}
    #"""
    for line in open(Prt_Regions_File, "rU").xreadlines():
        line = line.rstrip()
        line = line.split("\t")
        proteinID = line[0]
        if proteinID in Protein_Pos_Db:
            domain_start = int(float(line[3]))
            domain_stop = int(float(line[4]))
            domainID = line[-2]
            domainName = line[-1]
            try:
                Domain_Pos_Db[proteinID].append((domain_start,domain_stop,domainID))
            except:
                Domain_Pos_Db[proteinID] = [(domain_start,domain_stop,domainID)]
            domainAnnotation_db[domainID] = domainName

    #"""
    for line in open(UniPrt_Regions_File, "rU").xreadlines():
        line = line.rstrip()
        line = line.split("\t")
        proteinID = line[0]
        if proteinID in Protein_Pos_Db:
            domain_start = int(float(line[3]))
            domain_stop = int(float(line[4]))
            domainID = line[-1]
            domainName = line[-1]
            try:
                Domain_Pos_Db[proteinID].append((domain_start,domain_stop,domainID))
            except:
                Domain_Pos_Db[proteinID] = [(domain_start,domain_stop,domainID)]
            domainAnnotation_db[domainID] = domainName
            #print('--',domainName,domain_start,domain_stop)

    # Do the same for domain coordinates
    Domain_virtualPos = RealToVirtual(Domain_Pos_Db, exon_virtualToRealPos, Protein_Transcript_db,Transcript_ExonRegion_db)

    return_val = ((junction_transcript_db, Protein_virtualPos, Domain_virtualPos, Transcript_db, exon_virtualToRealPos, ETC_dict, microRNA_db, domainAnnotation_db))
    return return_val
Exemplo n.º 54
0
def performGroupNormalization(filename,export_dir,platform):
    expressionDataFormat,increment,convertNonLogToLog = ExpressionBuilder.checkExpressionFileFormat(filename)
    groups_dir = string.replace(export_dir,'exp.','batch.')
    fn=unique.filepath(filename); row_number=0; exp_db={}; relative_headers_exported = False
    group_db = importGroups(groups_dir)
    export_data = export.ExportFile(export_dir)
    for line in open(fn,'rU').xreadlines():
        data = ExpressionBuilder.cleanUpLine(line)
        t = string.split(data,'\t')
        if data[0]=='#' and row_number==0: row_number = 0
        elif row_number==0:
            sample_list = t[1:]
            new_sample_list = []
            for group in group_db:
                group_samples = group_db[group]
                try:
                    sample_index_list = map(lambda x: sample_list.index(x), group_samples)
                    group_db[group] = sample_index_list
                    new_sample_list+=group_samples
                except Exception:
                    missing=[]
                    for x in sample_list:
                        if x not in t[1:]: missing.append(x)
                    print 'missing:',missing
                    print t
                    print sample_list
                    print filename, groups_dir
                    print 'Unknown Error!!! Skipping cluster input file build (check column and row formats for conflicts)'; forceExit
            title = string.join([t[0]]+new_sample_list,'\t')+'\n' ### output the new sample order (group file order)
            export_data.write(title)
            row_number=1
        else:
            gene = t[0]
            if expressionDataFormat == 'non-log' and (convertNonLogToLog or platform == 'RNASeq'):
                ### Convert to log2 RPKM values - or counts
    
                try: all_values = map(lambda x: math.log(float(x)+increment,2), t[1:])
                except Exception:
                    all_values = ExpressionBuilder.logTransformWithNAs(t[1:],increment)
            else:
                try: all_values = map(float,t[1:])
                except Exception:
                    all_values = ExpressionBuilder.logTransformWithNAs(t[1:],increment)
            row_number+=1 ### Keep track of the first gene as to write out column headers for the relative outputs
            gene_log_folds = []

            for group in group_db:
                sample_index_list = group_db[group]
                ### Calculate log-fold values relative to the mean of all sample expression values
                try: values = map(lambda x: all_values[x], sample_index_list) ### simple and fast way to reorganize the samples
                except Exception:
                    print len(values), sample_index_list;kill
                try: avg = statistics.avg(values)
                except Exception:
                    values2=[]
                    for v in values:
                        try: values2.append(float(v))
                        except Exception: pass
                    values = values2
                    try: avg = statistics.avg(values)
                    except Exception:
                        if len(values)>0: avg = values[0]
                        else: avg = 0
                try: log_folds = map(lambda x: (x-avg), values)
                except Exception: 
                    log_folds=[]
                    for x in values:
                        try: log_folds.append(x-avg)
                        except Exception: log_folds.append('')
                gene_log_folds+=log_folds                            
            gene_log_folds = map(lambda x: str(x),gene_log_folds)
            export_data.write(string.join([gene]+gene_log_folds,'\t')+'\n')
    export_data.close()
Exemplo n.º 55
0
def remoteSashimiPlot(Species,
                      fl,
                      bamdir,
                      eventsToVisualizeFilename,
                      events=None,
                      show=False):
    global PSIFilename
    global outputdir
    global root_dir
    global steady_state_exp_file
    global species
    species = Species

    try:
        countinp = fl.CountsFile()
        root_dir = fl.RootDir()
    except Exception:
        root_dir = fl
        search_dir = root_dir + '/ExpressionInput'
        files = unique.read_directory(search_dir)
        for file in files:
            if 'counts.' in file and 'steady-state.txt' not in file:
                countinp = search_dir + '/' + file

    PSIFilename = root_dir + '/AltResults/AlternativeOutput/' + species + '_RNASeq_top_alt_junctions-PSI.txt'

    import ExpressionBuilder
    dir_list = unique.read_directory(root_dir + '/ExpressionInput')
    for file in dir_list:
        if 'exp.' in file and 'steady-state' not in file:
            exp_file = root_dir + '/ExpressionInput/' + file
        elif 'exp.' in file and 'steady-state' in file:
            steady_state_exp_file = root_dir + '/ExpressionInput/' + file
    global sample_group_db
    sample_group_db = ExpressionBuilder.simplerGroupImport(exp_file)

    #outputdir=findParentDir(PSIFilename)+"sashimiplots"
    outputdir = root_dir + '/ExonPlots'
    outputdir = root_dir + '/SashimiPlots'
    try:
        os.mkdir(unique.filepath(outputdir))
    except Exception:
        pass

    if show:
        s = open(outputdir + '/show.txt', 'w')
        s.write('TRUE')
        s.close()
    else:
        s = open(outputdir + '/show.txt', 'w')
        s.write('FALSE')
        s.close()

    geneSymbol_db = Sashimiplottting(bamdir,
                                     countinp,
                                     PSIFilename,
                                     eventsToVisualizeFilename,
                                     events=events)
    for filename in os.listdir(outputdir):
        if '.pdf' in filename or '.png' in filename:
            fn = string.replace(filename, '.pdf', '')
            fn = string.replace(fn, '.png', '')
            newname = string.split(fn, '__')
            if newname[0] in geneSymbol_db:
                new_filename = str(filename)
                if '__' in filename:
                    new_filename = string.split(filename, '__')[1]
                elif '\\' in filename:
                    new_filename = string.split(filename, '\\')[1]
                elif '/' in filename:
                    new_filename = string.split(filename, '/')[1]
                nnname = geneSymbol_db[
                    newname[0]][0] + '-SashimiPlot_' + new_filename
                try:
                    os.rename(os.path.join(outputdir, filename),
                              os.path.join(outputdir, nnname))
                except Exception:
                    if 'already exists' in traceback.format_exc():
                        ### File already exists, delete the new one
                        try:
                            os.remove(os.path.join(outputdir, nnname))
                        except Exception:
                            pass
                        ### Now right the new one
                        try:
                            os.rename(os.path.join(outputdir, filename),
                                      os.path.join(outputdir, nnname))
                        except Exception:
                            pass
                    pass
            else:
                continue
    print ''
Exemplo n.º 56
0
                     "iconfile": "Viewer.icns"}
        }
        setup(name=_appName,
                        app=[_script],
                        version=_appVersion,
                        description=_appDescription,
                        author=_authorName,
                        author_email=_authorEmail,
                        url=_authorURL,
                        options=options,
                        #data_files=data_files,
                        setup_requires=["py2app"]
        )

	import unique, shutil
	root_path = unique.filepath('')
	software_path = root_path+'/dist/AltAnalyzeViewer.app/Contents/Frameworks/Tcl.framework'
	shutil.rmtree(software_path)
	software_path = root_path+'/dist/AltAnalyzeViewer.app/Contents/Frameworks/Tk.framework'
	shutil.rmtree(software_path)
	software_path = root_path+'/dist/AltAnalyzeViewer.app/Contents/Resources/mpl-data/sample_data'
	shutil.rmtree(software_path)
	software_path = root_path+'/dist/AltAnalyzeViewer.app/Contents/Resources/lib/python2.7/matplotlib/tests'
	shutil.rmtree(software_path)

if sys.platform.startswith("win"):
        ### example command: python setup.py py2exe
        from distutils.core import setup
        import py2exe
        import suds
        import numpy
Exemplo n.º 57
0
def filepath(filename):
    fn = unique.filepath(filename)
    return fn
Exemplo n.º 58
0
def update_plot_settings(bamdir,list1,list2,samp):
    export_pl=open(unique.filepath('Config/sashimi_plot_settings.txt'),'w')
    export_pl.write('[data]')
    export_pl.write('\n')
    export_pl.write('bam_prefix = '+bamdir+'\n')
    export_pl.write('bam_files =[')
  
    for i in range(len(list1)):
        g=samp[list1[i]].replace('.bed','.bam')
	#print i
        if i==len(list1)-1 and len(list2)==0:
            export_pl.write('"'+g+'"]')
        else:
            export_pl.write('"'+g+'",')      
    for j in range(len(list2)):
	#print j
        g=samp[list2[j]].replace('.bed','.bam')
        export_pl.write('"'+g+'"')
        if j==len(list2)-1:
            export_pl.write(']')
        else:
            export_pl.write(',')
    
	
    export_pl.write('\n')
    export_pl.write('[plotting]')
    export_pl.write('\n') 
    export_pl.write('fig_width = 7 \nfig_height = 7 \nintron_scale = 30 \nexon_scale = 4 \nlogged = False\n')
    export_pl.write('font_size = 6 \nbar_posteriors = False \nnyticks = 4 \nnxticks = 4 \n')
    export_pl.write('show_ylabel = False \nshow_xlabel = True \nshow_posteriors = False \nnumber_junctions = True \n')
    export_pl.write('resolution = .5 \nposterior_bins = 40 \ngene_posterior_ratio = 5 \n')
    export_pl.write('colors =[')
    for i in range(len(list1)):
        export_pl.write('"'+'red'+'"')
        if i==len(list1)-1 and len(list2)==0:
            export_pl.write(']')
        else:
            export_pl.write(',')
    for j in range(len(list2)):
        export_pl.write('"'+'blue'+'"')
        if j==len(list2)-1:
            export_pl.write(']')
        else:
            export_pl.write(',')
    export_pl.write('\n')       
    export_pl.write('coverages =[')
    for i in range(len(list1)):
        
        e=sample_read[samp[list1[i]]]
        export_pl.write(str(int(e)))
        if i==len(list1)-1 and len(list2)==0:
            export_pl.write(']')
        else:
            export_pl.write(',')
    for j in range(len(list2)):
        e=sample_read[samp[list2[j]]]
        export_pl.write(str(int(e)))
        if j==len(list2)-1:
            export_pl.write(']')
        else:
            export_pl.write(',')
    export_pl.write('\n')
    export_pl.write('bar_color = "b" \nbf_thresholds = [0, 1, 2, 5, 10, 20]')
    export_pl.close()