Пример #1
0
def grabExonIntronPromoterSequences(species,array_type,data_type,output_types):
    ### output_types could be adjacent intron sequences, adjacent exon sequences, targets exon sequence or promoter
    sequence_input_dir_list=[]
    if data_type == 'probeset': sequence_input_dir = '/AltResults/AlternativeOutput/'+array_type+'/sequence_input'
    if data_type == 'gene': sequence_input_dir = '/ExpressionOutput/'+array_type+'/sequence_input'
    
    dir_list = read_directory(sequence_input_dir)
    for input_file in dir_list:
        filedir = sequence_input_dir[1:]+'/'+input_file
        filter_db = inputResultFiles(filedir,data_type)
        export_exon_filename = 'AltDatabase/'+species+'/'+array_type+'/'+species+'_Ensembl_probesets.txt'        
        ensembl_probeset_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction(export_exon_filename,data_type,filter_db)
        """for gene in ensembl_probeset_db:
            if gene == 'ENSG00000139737':
                for x in ensembl_probeset_db[gene]:
                    exon_id,((probe_start,probe_stop,probeset_id,exon_class,transcript_clust),ed) = x
                    print gene, ed.ExonID()
        kill"""
        analysis_type = 'get_sequence'
        dir = 'AltDatabase/ensembl/'+species+'/'; gene_seq_filename = dir+species+'_gene-seq-2000_flank'
        ensembl_probeset_db = EnsemblImport.import_sequence_data(gene_seq_filename,ensembl_probeset_db,species,analysis_type)

        """
        critical_exon_file = 'AltDatabase/'+species+'/'+ array_type + '/' + array_type+'_critical-exon-seq.txt'
        if output_types == 'all' and data_type == 'probeset':
            output_types = ['alt-promoter','promoter','exon','adjacent-exons','adjacent-introns']
        else: output_types = [output_types]
        
        for output_type in output_types:
            sequence_input_dir = string.replace(sequence_input_dir,'_input','_output')
            filename = sequence_input_dir[1:]+'/ExportedSequence-'+data_type+'-'+output_type+'.txt'
            exportExonIntronPromoterSequences(filename, ensembl_probeset_db,data_type,output_type)
        """
        if output_types == 'all' and data_type == 'probeset':
            output_types = ['alt-promoter','promoter','exon','adjacent-exons','adjacent-introns']
        else: output_types = [output_types]
        
        for output_type in output_types:
            sequence_input_dir2 = string.replace(sequence_input_dir,'_input','_output')
            filename = sequence_input_dir2[1:]+'/'+input_file[:-4]+'-'+data_type+'-'+output_type+'.txt'
            exportExonIntronPromoterSequences(filename, ensembl_probeset_db,data_type,output_type)
Пример #2
0
def displayExpressionGraph(species,
                           Platform,
                           exp_file,
                           gene,
                           transpose,
                           display=True,
                           showIntrons=False,
                           analysisType='plot'):
    ### Get gene annotations (users can provide an Ensembl or symbol)
    print 'Importing exon-level expression data for visualization (be patient)...'
    import ExonAnalyze_module
    global platform
    platform = Platform
    if platform != 'AltMouse':
        gene_annotation_file = "AltDatabase/ensembl/" + species + "/" + species + "_Ensembl-annotations.txt"
    else:
        gene_annotation_file = "AltDatabase/" + species + "/" + platform + "/" + platform + "_gene_annotations.txt"

    genes = []
    gene = string.replace(gene, '|', ',')
    gene = string.replace(gene, ' ', ',')
    if ',' in gene:
        genes += string.split(gene, ',')
    else:
        genes.append(gene)
    gene_db = {}
    for gene in genes:
        try:
            if 'ENS' in gene:
                try:
                    annotate_db  ### If variable is defined
                except Exception:
                    annotate_db = ExonAnalyze_module.import_annotations(
                        gene_annotation_file, platform,
                        keyBySymbol=False)  ### Make an SQLite call
                gene_symbol = annotate_db[gene].Symbol()
            else:
                try:
                    annotate_db  ### If variable is defined
                except Exception:
                    annotate_db = ExonAnalyze_module.import_annotations(
                        gene_annotation_file, platform, keyBySymbol=True)
                gene_symbol = gene
                gene = annotate_db[gene].GeneID()
            gene_db[gene] = gene_symbol
        except Exception:
            print gene, 'not in database'

    if len(gene_db) == 0:
        force_no_gene_found_error
    if 'AltResults' in exp_file:
        root_dir = string.split(exp_file, 'AltResults')[0] + 'ExonPlots/'
    else:
        root_dir = string.split(exp_file, 'ExpressionInput')[0] + 'ExonPlots/'

    import ExonAnalyze_module
    if platform == 'RNASeq': datatype = 'exons'
    else: datatype = 'probesets'
    export_exon_filename = 'AltDatabase/' + species + '/' + platform + '/' + species + '_Ensembl_' + datatype + '.txt'
    if verifyFileLength(export_exon_filename) == 0:
        rootdir = string.replace(root_dir, 'ExonPlots/', '')
        export_exon_filename = rootdir + '/' + export_exon_filename

    import ExonArrayEnsemblRules
    ensembl_exon_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction(
        export_exon_filename, 'gene-probesets',
        gene_db)  ### Make an SQLite call

    filter_db = {}
    for gene in ensembl_exon_db:
        ensembl_exon_db[gene].sort()
        for (index, ed, id) in ensembl_exon_db[gene]:
            filter_db[id] = []

    try:
        os.mkdir(root_dir)
    except Exception:
        None  ### dir exists
    print 'Image results being saved to the folder "ExonPlots" in the AltAnalyze results directory.'
    importTableEntries(exp_file,
                       filter_db,
                       ensembl_exon_db,
                       gene_db,
                       root_dir,
                       transpose,
                       display,
                       showIntrons,
                       analysisType=analysisType)  ### Make an SQLite call
Пример #3
0
def displayExpressionGraph(species,Platform,exp_file,gene,transpose,display=True,showIntrons=False,analysisType='plot'):
    ### Get gene annotations (users can provide an Ensembl or symbol)
    print 'Importing exon-level expression data for visualization (be patient)...'
    import ExonAnalyze_module
    global platform
    platform = Platform
    if platform != 'AltMouse': gene_annotation_file = "AltDatabase/ensembl/"+species+"/"+species+"_Ensembl-annotations.txt"
    else: gene_annotation_file = "AltDatabase/"+species+"/"+platform+"/"+platform+"_gene_annotations.txt"

    genes=[]
    gene=string.replace(gene,'|',',')
    gene=string.replace(gene,' ',',')
    if ',' in gene:
        genes += string.split(gene,',')
    else: genes.append(gene)    
    gene_db={}
    for gene in genes:
        try:
            if 'ENS' in gene:
                try: annotate_db ### If variable is defined
                except Exception:
                    annotate_db = ExonAnalyze_module.import_annotations(gene_annotation_file,platform,keyBySymbol=False) ### Make an SQLite call
                gene_symbol = annotate_db[gene].Symbol()
            else:
                try: annotate_db ### If variable is defined
                except Exception:
                    annotate_db = ExonAnalyze_module.import_annotations(gene_annotation_file,platform,keyBySymbol=True)
                gene_symbol = gene
                gene = annotate_db[gene].GeneID()
            gene_db[gene]=gene_symbol
        except Exception:
            #if len(gene)>0: print gene, 'not in database'
            pass
        
    if len(gene_db)==0:
        force_no_gene_found_error
    if 'AltResults' in exp_file:
        root_dir = string.split(exp_file,'AltResults')[0]+'ExonPlots/'
    else:
        root_dir = string.split(exp_file,'ExpressionInput')[0]+'ExonPlots/'
        
    import ExonAnalyze_module
    if platform == 'RNASeq': datatype = 'exons'
    else: datatype = 'probesets'
    export_exon_filename = 'AltDatabase/'+species+'/'+platform+'/'+species+'_Ensembl_'+datatype+'.txt'
    if verifyFileLength(export_exon_filename) == 0:
        rootdir = string.replace(root_dir,'ExonPlots/','')
        export_exon_filename = rootdir+'/'+export_exon_filename
    
    import ExonArrayEnsemblRules
    ensembl_exon_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction(export_exon_filename,'gene-probesets',gene_db) ### Make an SQLite call
    
    filter_db = {}
    for gene in ensembl_exon_db:
        ensembl_exon_db[gene].sort()
        
        for (index,ed,id) in ensembl_exon_db[gene]:
            filter_db[id] = []
            
    try: os.mkdir(root_dir)
    except Exception: None ### dir exists
    print 'Image results being saved to the folder "ExonPlots" in the AltAnalyze results directory.'
    importTableEntries(exp_file,filter_db,ensembl_exon_db,gene_db,root_dir,transpose,display,showIntrons,analysisType=analysisType) ### Make an SQLite call
def getJunctionComparisonsFromExport(species,array_type):
    type = 'standard'
    gene_junction_db = importEnsemblUCSCAltJunctions(species,type)
    
    ### Retrieve probesets with exon-junctions associated - these are critical exons
    filename = 'AltDatabase/'+species+'/'+array_type+'/'+species+'_Ensembl_'+array_type+'_probesets.txt'
    gene_probeset_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction(filename,'junctions',{})
    left={}; right={}; gene_db={}; gene_exon_db={}; nonjunction_aligning={}
    for gene in gene_probeset_db:
        for (probe_data,ed) in gene_probeset_db[gene]:
            probeset, strand, probeset_start, probeset_stop = probe_data
            region_id = string.replace(ed.RegionNumber(),'-','.')
            original_region_id = region_id
            region_ids = string.split(region_id,'|')
            gene_db[probeset[:-2]]=gene
            #ed.AssociatedSplicingJunctions()
            r_starts=string.split(ed.ExonStart(),'|'); r_stops=string.split(ed.ExonStop(),'|')
            for region_id in region_ids:
                if '|5' in probeset:
                    try: left[probeset[:-2]].append(region_id)
                    except Exception: left[probeset[:-2]]=[region_id]
                    if strand == '+': ### If the junction probesets DO NOT align to the region coordinates, then the probeset maps to a junction outside the database
                        if probeset_stop not in r_stops: nonjunction_aligning[probeset[:-2]] = original_region_id+'_'+probeset_stop,'left'
                    elif probeset_start not in r_starts: nonjunction_aligning[probeset[:-2]] = original_region_id+'_'+probeset_start,'left'
                elif '|3' in probeset:
                    try: right[probeset[:-2]].append(region_id)
                    except Exception: right[probeset[:-2]]=[region_id]
                    if strand == '+':
                        if probeset_start not in r_starts: nonjunction_aligning[probeset[:-2]] = original_region_id+'_'+probeset_start,'right'
                    elif probeset_stop not in r_stops: nonjunction_aligning[probeset[:-2]] = original_region_id+'_'+probeset_stop,'right'
                else:
                    if '_' in region_id: print killer
                    try: gene_exon_db[gene,region_id].append(probeset)
                    except Exception: gene_exon_db[gene,region_id] = [probeset]

    print 'len(nonjunction_aligning)',len(nonjunction_aligning)
    gene_exon_db = eliminateRedundant(gene_exon_db)            
    junction_db={} ### Get the exon-region IDs for an exon-junction
    for probeset in left:
        gene = gene_db[probeset]
        if probeset in right:
            for region1 in left[probeset]:
                for region2 in right[probeset]:
                    junction = region1+'-'+region2
                    try: junction_db[gene,junction].append(probeset)
                    except Exception: junction_db[gene,junction] = [probeset]

    probeset_junction_export = 'AltDatabase/' + species + '/'+array_type+'/'+ species + '_junction_comps.txt'
    
    fn=filepath(probeset_junction_export); data = open(fn,'w')
    print "Exporting",probeset_junction_export
    title = 'gene'+'\t'+'critical_exon'+'\t'+'exclusion_junction_region'+'\t'+'inclusion_junction_region'+'\t'+'exclusion_probeset'+'\t'+'inclusion_probeset'+'\t'+'data_source'+'\n'
    data.write(title); temp_list=[]
    
    for (gene,critical_exon,incl_junction,excl_junction) in gene_junction_db:
        if (gene,incl_junction) in junction_db:
            incl_junction_probesets = junction_db[gene,incl_junction]
            if (gene,excl_junction) in junction_db:
                excl_junction_probesets = junction_db[gene,excl_junction]
                for incl_junction_probeset in incl_junction_probesets:
                    for excl_junction_probeset in excl_junction_probesets:
                        try:
                            for incl_exon_probeset in gene_exon_db[gene,critical_exon]:
                                if incl_junction_probeset in nonjunction_aligning or excl_junction_probeset in nonjunction_aligning: null=[]
                                else: ### Ensure the probeset DOES map to the annotated junctions
                                    temp_list.append(string.join([gene,critical_exon,excl_junction,critical_exon,excl_junction_probeset,incl_exon_probeset,'AltAnalyze'],'\t')+'\n')
                        except Exception: null=[]
                        if incl_junction_probeset in nonjunction_aligning:
                            new_region_id, side = nonjunction_aligning[incl_junction_probeset]
                            incl_junction = renameJunction(incl_junction,side,new_region_id)
                        if excl_junction_probeset in nonjunction_aligning:
                            new_region_id, side = nonjunction_aligning[excl_junction_probeset]
                            excl_junction = renameJunction(excl_junction,side,new_region_id)
                        if excl_junction_probeset!=incl_junction_probeset:
                            temp_list.append(string.join([gene,critical_exon,excl_junction,incl_junction,excl_junction_probeset,incl_junction_probeset,'AltAnalyze'],'\t')+'\n')
    temp_list = unique.unique(temp_list)
    for i in temp_list: data.write(i)
    data.close()
    print 'Number of compared junctions exported', len(temp_list)