def getAnnotations(Species,array_type,reannotate_exon_seq,force):
    """Annotate Affymetrix exon array data using files Ensembl data (sync'ed to genome release)."""
    global species; species = Species; global test; global test_cluster
    test = 'no'; test_cluster = ['TC0701360']; data_type = 'mRNA'

    global ensembl_exon_db; global ensembl_exon_db; global exon_clusters; global exon_region_db
    ensembl_exon_db,ensembl_annot_db,exon_clusters,intron_clusters,exon_region_db,intron_retention_db,ucsc_splicing_annot_db,ens_transcript_db = EnsemblImport.getEnsemblAssociations(species,data_type,test)
    ensembl_probeset_db = importCriticalExonLocations(species,array_type,ensembl_exon_db,force) ###Get Pre-computed genomic locations for critical exons
    ensembl_probeset_db = ExonArrayEnsemblRules.annotateExons(ensembl_probeset_db,exon_clusters,ensembl_exon_db,exon_region_db,intron_retention_db,intron_clusters,ucsc_splicing_annot_db); constitutive_gene_db={}
    ExonArrayEnsemblRules.exportEnsemblLinkedProbesets(array_type,ensembl_probeset_db,species)
    print "\nCritical exon data exported coordinates, exon associations and splicing annotations exported..."
    
    ### Change filenames to reflect junction array type
    export_filename = 'AltDatabase/'+species+'/'+array_type+'/'+species+'_Ensembl_probesets.txt'; ef=filepath(export_filename)
    export_replacement = string.replace(export_filename,'_probe','_'+array_type+'_probe')
    er=filepath(export_replacement); shutil.copyfile(ef,er); os.remove(ef) ### Copy file to a new name

    ### Export full exon seqeunce for probesets/critical exons to replace the original incomplete sequence (used for miRNA analyses)
    if reannotate_exon_seq == 'yes':
        JunctionArray.reAnnotateCriticalExonSequences(species,array_type)
Пример #2
0
def buildExonArrayExonAnnotations(species, array_type, force):

    ### Get UCSC associations (download databases if necessary)
    mRNA_Type = 'mrna'; run_from_scratch = 'yes'
    export_all_associations = 'no' ### YES only for protein prediction analysis
    buildUCSCAnnoationFiles(species,mRNA_Type,export_all_associations,run_from_scratch,force)
    import ExonArrayEnsemblRules; reload(ExonArrayEnsemblRules)
    process_from_scratch='yes'
    constitutive_source='default'
    ### Build the databases and return the variables (not used here)
    source_biotype = 'mRNA'
    if array_type == 'gene': source_biotype = 'gene'
    probeset_db,annotate_db,constitutive_gene_db,splicing_analysis_db = ExonArrayEnsemblRules.getAnnotations(process_from_scratch,constitutive_source,source_biotype,species)
Пример #3
0
def buildExonArrayExonAnnotations(species, array_type, force):

    ### Get UCSC associations (download databases if necessary)
    mRNA_Type = 'mrna'
    run_from_scratch = 'yes'
    export_all_associations = 'no'  ### YES only for protein prediction analysis
    buildUCSCAnnoationFiles(species, mRNA_Type, export_all_associations,
                            run_from_scratch, force)
    import ExonArrayEnsemblRules
    reload(ExonArrayEnsemblRules)
    process_from_scratch = 'yes'
    constitutive_source = 'default'
    ### Build the databases and return the variables (not used here)
    source_biotype = 'mRNA'
    if array_type == 'gene': source_biotype = 'gene'
    probeset_db, annotate_db, constitutive_gene_db, splicing_analysis_db = ExonArrayEnsemblRules.getAnnotations(
        process_from_scratch, constitutive_source, source_biotype, species)
Пример #4
0
def grabExonIntronPromoterSequences(species,array_type,data_type,output_types):
    ### output_types could be adjacent intron sequences, adjacent exon sequences, targets exon sequence or promoter
    sequence_input_dir_list=[]
    if data_type == 'probeset': sequence_input_dir = '/AltResults/AlternativeOutput/'+array_type+'/sequence_input'
    if data_type == 'gene': sequence_input_dir = '/ExpressionOutput/'+array_type+'/sequence_input'
    
    dir_list = read_directory(sequence_input_dir)
    for input_file in dir_list:
        filedir = sequence_input_dir[1:]+'/'+input_file
        filter_db = inputResultFiles(filedir,data_type)
        export_exon_filename = 'AltDatabase/'+species+'/'+array_type+'/'+species+'_Ensembl_probesets.txt'        
        ensembl_probeset_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction(export_exon_filename,data_type,filter_db)
        """for gene in ensembl_probeset_db:
            if gene == 'ENSG00000139737':
                for x in ensembl_probeset_db[gene]:
                    exon_id,((probe_start,probe_stop,probeset_id,exon_class,transcript_clust),ed) = x
                    print gene, ed.ExonID()
        kill"""
        analysis_type = 'get_sequence'
        dir = 'AltDatabase/ensembl/'+species+'/'; gene_seq_filename = dir+species+'_gene-seq-2000_flank'
        ensembl_probeset_db = EnsemblImport.import_sequence_data(gene_seq_filename,ensembl_probeset_db,species,analysis_type)

        """
        critical_exon_file = 'AltDatabase/'+species+'/'+ array_type + '/' + array_type+'_critical-exon-seq.txt'
        if output_types == 'all' and data_type == 'probeset':
            output_types = ['alt-promoter','promoter','exon','adjacent-exons','adjacent-introns']
        else: output_types = [output_types]
        
        for output_type in output_types:
            sequence_input_dir = string.replace(sequence_input_dir,'_input','_output')
            filename = sequence_input_dir[1:]+'/ExportedSequence-'+data_type+'-'+output_type+'.txt'
            exportExonIntronPromoterSequences(filename, ensembl_probeset_db,data_type,output_type)
        """
        if output_types == 'all' and data_type == 'probeset':
            output_types = ['alt-promoter','promoter','exon','adjacent-exons','adjacent-introns']
        else: output_types = [output_types]
        
        for output_type in output_types:
            sequence_input_dir2 = string.replace(sequence_input_dir,'_input','_output')
            filename = sequence_input_dir2[1:]+'/'+input_file[:-4]+'-'+data_type+'-'+output_type+'.txt'
            exportExonIntronPromoterSequences(filename, ensembl_probeset_db,data_type,output_type)
Пример #5
0
def displayExpressionGraph(species,
                           Platform,
                           exp_file,
                           gene,
                           transpose,
                           display=True,
                           showIntrons=False,
                           analysisType='plot'):
    ### Get gene annotations (users can provide an Ensembl or symbol)
    print 'Importing exon-level expression data for visualization (be patient)...'
    import ExonAnalyze_module
    global platform
    platform = Platform
    if platform != 'AltMouse':
        gene_annotation_file = "AltDatabase/ensembl/" + species + "/" + species + "_Ensembl-annotations.txt"
    else:
        gene_annotation_file = "AltDatabase/" + species + "/" + platform + "/" + platform + "_gene_annotations.txt"

    genes = []
    gene = string.replace(gene, '|', ',')
    gene = string.replace(gene, ' ', ',')
    if ',' in gene:
        genes += string.split(gene, ',')
    else:
        genes.append(gene)
    gene_db = {}
    for gene in genes:
        try:
            if 'ENS' in gene:
                try:
                    annotate_db  ### If variable is defined
                except Exception:
                    annotate_db = ExonAnalyze_module.import_annotations(
                        gene_annotation_file, platform,
                        keyBySymbol=False)  ### Make an SQLite call
                gene_symbol = annotate_db[gene].Symbol()
            else:
                try:
                    annotate_db  ### If variable is defined
                except Exception:
                    annotate_db = ExonAnalyze_module.import_annotations(
                        gene_annotation_file, platform, keyBySymbol=True)
                gene_symbol = gene
                gene = annotate_db[gene].GeneID()
            gene_db[gene] = gene_symbol
        except Exception:
            print gene, 'not in database'

    if len(gene_db) == 0:
        force_no_gene_found_error
    if 'AltResults' in exp_file:
        root_dir = string.split(exp_file, 'AltResults')[0] + 'ExonPlots/'
    else:
        root_dir = string.split(exp_file, 'ExpressionInput')[0] + 'ExonPlots/'

    import ExonAnalyze_module
    if platform == 'RNASeq': datatype = 'exons'
    else: datatype = 'probesets'
    export_exon_filename = 'AltDatabase/' + species + '/' + platform + '/' + species + '_Ensembl_' + datatype + '.txt'
    if verifyFileLength(export_exon_filename) == 0:
        rootdir = string.replace(root_dir, 'ExonPlots/', '')
        export_exon_filename = rootdir + '/' + export_exon_filename

    import ExonArrayEnsemblRules
    ensembl_exon_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction(
        export_exon_filename, 'gene-probesets',
        gene_db)  ### Make an SQLite call

    filter_db = {}
    for gene in ensembl_exon_db:
        ensembl_exon_db[gene].sort()
        for (index, ed, id) in ensembl_exon_db[gene]:
            filter_db[id] = []

    try:
        os.mkdir(root_dir)
    except Exception:
        None  ### dir exists
    print 'Image results being saved to the folder "ExonPlots" in the AltAnalyze results directory.'
    importTableEntries(exp_file,
                       filter_db,
                       ensembl_exon_db,
                       gene_db,
                       root_dir,
                       transpose,
                       display,
                       showIntrons,
                       analysisType=analysisType)  ### Make an SQLite call
Пример #6
0
def displayExpressionGraph(species,Platform,exp_file,gene,transpose,display=True,showIntrons=False,analysisType='plot'):
    ### Get gene annotations (users can provide an Ensembl or symbol)
    print 'Importing exon-level expression data for visualization (be patient)...'
    import ExonAnalyze_module
    global platform
    platform = Platform
    if platform != 'AltMouse': gene_annotation_file = "AltDatabase/ensembl/"+species+"/"+species+"_Ensembl-annotations.txt"
    else: gene_annotation_file = "AltDatabase/"+species+"/"+platform+"/"+platform+"_gene_annotations.txt"

    genes=[]
    gene=string.replace(gene,'|',',')
    gene=string.replace(gene,' ',',')
    if ',' in gene:
        genes += string.split(gene,',')
    else: genes.append(gene)    
    gene_db={}
    for gene in genes:
        try:
            if 'ENS' in gene:
                try: annotate_db ### If variable is defined
                except Exception:
                    annotate_db = ExonAnalyze_module.import_annotations(gene_annotation_file,platform,keyBySymbol=False) ### Make an SQLite call
                gene_symbol = annotate_db[gene].Symbol()
            else:
                try: annotate_db ### If variable is defined
                except Exception:
                    annotate_db = ExonAnalyze_module.import_annotations(gene_annotation_file,platform,keyBySymbol=True)
                gene_symbol = gene
                gene = annotate_db[gene].GeneID()
            gene_db[gene]=gene_symbol
        except Exception:
            #if len(gene)>0: print gene, 'not in database'
            pass
        
    if len(gene_db)==0:
        force_no_gene_found_error
    if 'AltResults' in exp_file:
        root_dir = string.split(exp_file,'AltResults')[0]+'ExonPlots/'
    else:
        root_dir = string.split(exp_file,'ExpressionInput')[0]+'ExonPlots/'
        
    import ExonAnalyze_module
    if platform == 'RNASeq': datatype = 'exons'
    else: datatype = 'probesets'
    export_exon_filename = 'AltDatabase/'+species+'/'+platform+'/'+species+'_Ensembl_'+datatype+'.txt'
    if verifyFileLength(export_exon_filename) == 0:
        rootdir = string.replace(root_dir,'ExonPlots/','')
        export_exon_filename = rootdir+'/'+export_exon_filename
    
    import ExonArrayEnsemblRules
    ensembl_exon_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction(export_exon_filename,'gene-probesets',gene_db) ### Make an SQLite call
    
    filter_db = {}
    for gene in ensembl_exon_db:
        ensembl_exon_db[gene].sort()
        
        for (index,ed,id) in ensembl_exon_db[gene]:
            filter_db[id] = []
            
    try: os.mkdir(root_dir)
    except Exception: None ### dir exists
    print 'Image results being saved to the folder "ExonPlots" in the AltAnalyze results directory.'
    importTableEntries(exp_file,filter_db,ensembl_exon_db,gene_db,root_dir,transpose,display,showIntrons,analysisType=analysisType) ### Make an SQLite call
def getJunctionComparisonsFromExport(species,array_type):
    type = 'standard'
    gene_junction_db = importEnsemblUCSCAltJunctions(species,type)
    
    ### Retrieve probesets with exon-junctions associated - these are critical exons
    filename = 'AltDatabase/'+species+'/'+array_type+'/'+species+'_Ensembl_'+array_type+'_probesets.txt'
    gene_probeset_db = ExonArrayEnsemblRules.reimportEnsemblProbesetsForSeqExtraction(filename,'junctions',{})
    left={}; right={}; gene_db={}; gene_exon_db={}; nonjunction_aligning={}
    for gene in gene_probeset_db:
        for (probe_data,ed) in gene_probeset_db[gene]:
            probeset, strand, probeset_start, probeset_stop = probe_data
            region_id = string.replace(ed.RegionNumber(),'-','.')
            original_region_id = region_id
            region_ids = string.split(region_id,'|')
            gene_db[probeset[:-2]]=gene
            #ed.AssociatedSplicingJunctions()
            r_starts=string.split(ed.ExonStart(),'|'); r_stops=string.split(ed.ExonStop(),'|')
            for region_id in region_ids:
                if '|5' in probeset:
                    try: left[probeset[:-2]].append(region_id)
                    except Exception: left[probeset[:-2]]=[region_id]
                    if strand == '+': ### If the junction probesets DO NOT align to the region coordinates, then the probeset maps to a junction outside the database
                        if probeset_stop not in r_stops: nonjunction_aligning[probeset[:-2]] = original_region_id+'_'+probeset_stop,'left'
                    elif probeset_start not in r_starts: nonjunction_aligning[probeset[:-2]] = original_region_id+'_'+probeset_start,'left'
                elif '|3' in probeset:
                    try: right[probeset[:-2]].append(region_id)
                    except Exception: right[probeset[:-2]]=[region_id]
                    if strand == '+':
                        if probeset_start not in r_starts: nonjunction_aligning[probeset[:-2]] = original_region_id+'_'+probeset_start,'right'
                    elif probeset_stop not in r_stops: nonjunction_aligning[probeset[:-2]] = original_region_id+'_'+probeset_stop,'right'
                else:
                    if '_' in region_id: print killer
                    try: gene_exon_db[gene,region_id].append(probeset)
                    except Exception: gene_exon_db[gene,region_id] = [probeset]

    print 'len(nonjunction_aligning)',len(nonjunction_aligning)
    gene_exon_db = eliminateRedundant(gene_exon_db)            
    junction_db={} ### Get the exon-region IDs for an exon-junction
    for probeset in left:
        gene = gene_db[probeset]
        if probeset in right:
            for region1 in left[probeset]:
                for region2 in right[probeset]:
                    junction = region1+'-'+region2
                    try: junction_db[gene,junction].append(probeset)
                    except Exception: junction_db[gene,junction] = [probeset]

    probeset_junction_export = 'AltDatabase/' + species + '/'+array_type+'/'+ species + '_junction_comps.txt'
    
    fn=filepath(probeset_junction_export); data = open(fn,'w')
    print "Exporting",probeset_junction_export
    title = 'gene'+'\t'+'critical_exon'+'\t'+'exclusion_junction_region'+'\t'+'inclusion_junction_region'+'\t'+'exclusion_probeset'+'\t'+'inclusion_probeset'+'\t'+'data_source'+'\n'
    data.write(title); temp_list=[]
    
    for (gene,critical_exon,incl_junction,excl_junction) in gene_junction_db:
        if (gene,incl_junction) in junction_db:
            incl_junction_probesets = junction_db[gene,incl_junction]
            if (gene,excl_junction) in junction_db:
                excl_junction_probesets = junction_db[gene,excl_junction]
                for incl_junction_probeset in incl_junction_probesets:
                    for excl_junction_probeset in excl_junction_probesets:
                        try:
                            for incl_exon_probeset in gene_exon_db[gene,critical_exon]:
                                if incl_junction_probeset in nonjunction_aligning or excl_junction_probeset in nonjunction_aligning: null=[]
                                else: ### Ensure the probeset DOES map to the annotated junctions
                                    temp_list.append(string.join([gene,critical_exon,excl_junction,critical_exon,excl_junction_probeset,incl_exon_probeset,'AltAnalyze'],'\t')+'\n')
                        except Exception: null=[]
                        if incl_junction_probeset in nonjunction_aligning:
                            new_region_id, side = nonjunction_aligning[incl_junction_probeset]
                            incl_junction = renameJunction(incl_junction,side,new_region_id)
                        if excl_junction_probeset in nonjunction_aligning:
                            new_region_id, side = nonjunction_aligning[excl_junction_probeset]
                            excl_junction = renameJunction(excl_junction,side,new_region_id)
                        if excl_junction_probeset!=incl_junction_probeset:
                            temp_list.append(string.join([gene,critical_exon,excl_junction,incl_junction,excl_junction_probeset,incl_junction_probeset,'AltAnalyze'],'\t')+'\n')
    temp_list = unique.unique(temp_list)
    for i in temp_list: data.write(i)
    data.close()
    print 'Number of compared junctions exported', len(temp_list)
Пример #8
0
def getAnnotations(fl,Array_type,p_threshold,e_threshold,data_source,manufacturer,constitutive_source,Species,avg_all_for_ss,filter_by_DABG,perform_alt_analysis,expression_data_format):
    global species; species = Species; global average_all_probesets; average_all_probesets={}
    global avg_all_probes_for_steady_state; avg_all_probes_for_steady_state = avg_all_for_ss; global filter_by_dabg; filter_by_dabg = filter_by_DABG
    global dabg_p_threshold; dabg_p_threshold = float(p_threshold); global root_dir; global biotypes; global normalize_feature_exp
    global expression_threshold; global exp_data_format; exp_data_format = expression_data_format; global UserOptions; UserOptions = fl
    global full_dataset_export_dir; global excludeLowExpressionExons

    """
    try: exon_exp_threshold = fl.ExonExpThreshold()
    except Exception: exon_exp_threshold = 0
    try: exon_rpkm_threshold = fl.ExonRPKMThreshold()
    except Exception: exon_rpkm_threshold = 0
    try: gene_rpkm_threshold = fl.RPKMThreshold()
    except Exception: gene_rpkm_threshold = 0
    try: gene_exp_threshold = fl.GeneExpThreshold()
    except Exception: gene_exp_threshold = 0
    """
    
    ### The input expression data can be log or non-log. If non-log, transform to log in FilterDABG prior to the alternative exon analysis - v.1.16    
    if expression_data_format == 'log':
        try: expression_threshold = math.log(float(e_threshold),2)
        except Exception: expression_threshold = 0 ### Applies to RNASeq datasets
    else:
        expression_threshold = float(e_threshold)
    
    process_from_scratch = 'no' ###internal variables used while testing
    global dabg_summary; global expression_summary; dabg_summary={};expression_summary={}
    global fulldataset_export_object; global array_type; array_type = Array_type
    global exp_analysis_type; exp_analysis_type = 'expression'
    global stats_input_dir
    expr_input_dir = fl.ExpFile(); stats_input_dir = fl.StatsFile(); root_dir = fl.RootDir()
    try: normalize_feature_exp = fl.FeatureNormalization()
    except Exception: normalize_feature_exp = 'NA'
    try: excludeLowExpressionExons = fl.excludeLowExpressionExons()
    except Exception: excludeLowExpressionExons = True
    try:
        useJunctionsForGeneExpression = fl.useJunctionsForGeneExpression()
        if useJunctionsForGeneExpression:
            print 'Using known junction only to estimate gene expression!!!'
    except Exception: useJunctionsForGeneExpression = False
    
    source_biotype = 'mRNA'
    if array_type == 'gene': source_biotype = 'gene'
    elif array_type == 'junction': source_biotype = 'junction'
    ###Get annotations using Affymetrix as a trusted source or via links to Ensembl

    if array_type == 'AltMouse':
        probeset_db,constitutive_gene_db = ExpressionBuilder.importAltMerge('full'); annotate_db={}
        source_biotype = 'AltMouse'
    elif manufacturer == 'Affymetrix' or array_type == 'RNASeq':
        if array_type == 'RNASeq':
            source_biotype = array_type, root_dir

        probeset_db,annotate_db,constitutive_gene_db,splicing_analysis_db = ExonArrayEnsemblRules.getAnnotations(process_from_scratch,constitutive_source,source_biotype,species)

    ### Get all file locations and get array headers
    #print len(splicing_analysis_db),"genes included in the splicing annotation database (constitutive only containing)"
    stats_file_status = verifyFile(stats_input_dir)
    array_linker_db,array_names = importExonProbesetData(expr_input_dir,{},'arraynames')
    input_dir_split = string.split(expr_input_dir,'/')
    full_dataset_export_dir = root_dir+'AltExpression/FullDatasets/ExonArray/'+species+'/'+string.replace(input_dir_split[-1],'exp.','')
    if array_type == 'gene': full_dataset_export_dir = string.replace(full_dataset_export_dir,'ExonArray','GeneArray')
    if array_type == 'junction': full_dataset_export_dir = string.replace(full_dataset_export_dir,'ExonArray','JunctionArray')
    if array_type == 'AltMouse': full_dataset_export_dir = string.replace(full_dataset_export_dir,'ExonArray','AltMouse')
    if array_type == 'RNASeq': full_dataset_export_dir = string.replace(full_dataset_export_dir,'ExonArray','RNASeq')
    try: fulldataset_export_object = export.ExportFile(full_dataset_export_dir)
    except Exception:
        print 'AltAnalyze is having trouble creating the directory:\n',full_dataset_export_dir
        print 'Report this issue to the AltAnalyze help desk or create this directory manually (Error Code X1).'; force_exception
    ### Organize arrays according to groups and export all probeset data and any pairwise comparisons
    data_type = 'expression'
    if array_type == 'RNASeq':
        expr_input_dir = string.replace(expr_input_dir,'exp.','counts.') ### Filter based on the counts file and then replace values with the normalized as the last step
    comparison_filename_list,biotypes = exportGroupedComparisonProbesetData(expr_input_dir,probeset_db,data_type,array_names,array_linker_db,perform_alt_analysis)
    if useJunctionsForGeneExpression:
        if 'junction' in biotypes:
            if 'exon' in biotypes: del biotypes['exon']
    if filter_by_dabg == 'yes' and stats_file_status == 'found':
        data_type = 'dabg'
        exportGroupedComparisonProbesetData(stats_input_dir,probeset_db,data_type,array_names,array_linker_db,perform_alt_analysis)
    ###Filter expression data based on DABG and annotation filtered probesets (will work without DABG filtering as well) - won't work for RNA-Seq (execute function later)
    filtered_exon_db = removeNonExpressedProbesets(probeset_db,full_dataset_export_dir)
    filterExpressionData(expr_input_dir,filtered_exon_db,constitutive_gene_db,probeset_db,'expression',array_names,perform_alt_analysis)
    constitutive_gene_db={}; probeset_gene_db = makeGeneLevelAnnotations(probeset_db)

    if array_type == 'RNASeq':
        fulldataset_export_object = export.ExportFile(full_dataset_export_dir)
        data_type = 'expression' ### Repeat with counts and then with exp. to add gene-level estimates to both
        exportGroupedComparisonProbesetData(expr_input_dir,probeset_db,data_type,array_names,array_linker_db,perform_alt_analysis)
        fulldataset_export_object = export.ExportFile(full_dataset_export_dir)
        expr_input_dir = string.replace(expr_input_dir,'counts.','exp.')
        exportGroupedComparisonProbesetData(expr_input_dir,probeset_db,data_type,array_names,array_linker_db,perform_alt_analysis)
        
    try: clearObjectsFromMemory(average_all_probesets); clearObjectsFromMemory(expression_summary); clearObjectsFromMemory(splicing_analysis_db)
    except Exception: null=[]
    filtered_exon_db=[]; probeset_db={}; average_all_probesets={}; expression_summary={}; splicing_analysis_db={}
    #filtered_exp_db,group_count,ranked_array_headers = filterExpressionData(expr_input_dir,filtered_exon_db,constitutive_gene_db,probeset_db)
    #filtered_gene_db = permformFtests(filtered_exp_db,group_count,probeset_db)

    """    
    pre_filtered_db=[]
    print 'global vars'
    returnLargeGlobalVars()    
    print 'local vars'
    all = [var for var in locals() if (var[:2], var[-2:]) != ("__", "__")]
    for var in all:
            try:
                if len(locals()[var])>500: print var, len(locals()[var])
            except Exception: null=[]
    """
    return probeset_gene_db, annotate_db, comparison_filename_list
Пример #9
0
    
    grabExonIntronPromoterSequences(Species,Array_type,Data_type,Output_types)
    sys.exit()
    #"""
    avg_all_for_ss = 'yes'
    import_dir = '/AltDatabase/'+Species+ '/exon'
    expr_file_dir = 'ExpressionInput\exp.HEK-confluency.plier.txt'
    dagb_p = 0.001
    f_cutoff = 2.297
    exons_to_grab = "core"
    x = 'Affymetrix'
    y = 'Ensembl'
    z = 'default'
    data_source = y
    constitutive_source = z
    filename = expr_file_dir; p = dagb_p
    getAnnotations(expr_file_dir,dagb_p,exons_to_grab,data_source,constitutive_source,Species)
    global species; species = Species
    process_from_scratch = 'no'
    ###Get annotations using Affymetrix as a trusted source or via links to Ensembl
    if data_source == 'Affymetrix':
        annotation_dbases = ExonArrayAffyRules.getAnnotations(exons_to_grab,constitutive_source,process_from_scratch)
        probe_association_db,constitutive_gene_db,exon_location_db, trans_annotation_db, trans_annot_extended = annotation_dbases
    else:
        probeset_db,annotate_db,constitutive_gene_db,splicing_analysis_db = ExonArrayEnsemblRules.getAnnotations(process_from_scratch,constitutive_source,species,avg_all_for_ss)

    filterExpressionData(filename,filtered_exon_db,constitutive_gene_db,probeset_db,data_type)
    #filtered_gene_db = permformFtests(filtered_exp_db,group_count,probeset_db)