Exemplo n.º 1
0
def buildUCSCAnnoationFiles(species,mRNA_Type,export_all_associations,run_from_scratch,force):
    ### Test whether files already exist and if not downloads/builds them
    if export_all_associations == 'no': ### Applies to EnsemblImport.py analyses
        filename = 'AltDatabase/ucsc/'+species+'/'+species+'_UCSC_transcript_structure_mrna.txt'
    else: ### Applies to the file used for Domain-level analyses
        filename = 'AltDatabase/ucsc/'+species+'/'+species+'_UCSC_transcript_structure_COMPLETE-mrna.txt'
    counts = verifyFile(filename,'counts')
    if counts<9:
        import UCSCImport
        try: UCSCImport.runUCSCEnsemblAssociations(species,mRNA_Type,export_all_associations,run_from_scratch,force)
        except Exception: UCSCImport.exportNullDatabases(species) ### used for species not supported by UCSC
Exemplo n.º 2
0
def buildUCSCAnnoationFiles(species, mRNA_Type, export_all_associations,
                            run_from_scratch, force):
    ### Test whether files already exist and if not downloads/builds them
    if export_all_associations == 'no':  ### Applies to EnsemblImport.py analyses
        filename = 'AltDatabase/ucsc/' + species + '/' + species + '_UCSC_transcript_structure_mrna.txt'
    else:  ### Applies to the file used for Domain-level analyses
        filename = 'AltDatabase/ucsc/' + species + '/' + species + '_UCSC_transcript_structure_COMPLETE-mrna.txt'
    counts = verifyFile(filename, 'counts')
    if counts < 9:
        import UCSCImport
        try:
            UCSCImport.runUCSCEnsemblAssociations(species, mRNA_Type,
                                                  export_all_associations,
                                                  run_from_scratch, force)
        except Exception:
            UCSCImport.exportNullDatabases(
                species)  ### used for species not supported by UCSC
Exemplo n.º 3
0
        for line in open(fn,'rU').xreadlines():
            counts+=1
            if counts>10: break
    except Exception:
        counts=0
    if species_name == 'counts': ### Used if the file cannot be downloaded from http://www.altanalyze.org
        return counts
    elif counts == 0:
        if species_name in filename: server_folder = species_name ### Folder equals species unless it is a universal file
        elif 'Mm' in filename: server_folder = 'Mm' ### For PicTar
        else: server_folder = 'all'
        print 'Downloading:',server_folder,filename
        update.downloadCurrentVersion(filename,server_folder,'txt')
    else:
        return counts
    
if __name__ == '__main__':
    species = 'Hs'; #species_full = 'Drosophila_melanogaster'
    filename = 'AltDatabase/ucsc/'+species+'/polyaDb.txt'
    verifyFile(filename,species) ### Makes sure file is local and if not downloads.
    sys.exit()
    importEnsExonStructureData(species,[],[],[]);sys.exit()
    reformatPolyAdenylationCoordinates(species,'no');sys.exit()
    #test = 'yes'
    #test_gene = ['ENSG00000140153','ENSG00000075413']
    import UCSCImport; import update
    knownAlt_dir = update.getFTPData('hgdownload.cse.ucsc.edu','/goldenPath/currentGenomes/'+species_full+'/database','knownAlt.txt.gz')
    polyA_dir = update.getFTPData('hgdownload.cse.ucsc.edu','/goldenPath/currentGenomes/'+species_full+'/database','polyaDb.txt.gz')
    output_dir = 'AltDatabase/ucsc/'+species + '/'
    UCSCImport.downloadFiles(knownAlt_dir,output_dir); UCSCImport.downloadFiles(polyA_dir,output_dir);sys.exit()
    ensembl_ucsc_splicing_annotations = importEnsExonStructureData(species,ensembl_gene_coordinates,ensembl_annotations,exon_annotation_db)
Exemplo n.º 4
0
if __name__ == '__main__':
    species = 'Hs'
    #species_full = 'Drosophila_melanogaster'
    filename = 'AltDatabase/ucsc/' + species + '/polyaDb.txt'
    verifyFile(filename,
               species)  ### Makes sure file is local and if not downloads.
    sys.exit()
    importEnsExonStructureData(species, [], [], [])
    sys.exit()
    reformatPolyAdenylationCoordinates(species, 'no')
    sys.exit()
    #test = 'yes'
    #test_gene = ['ENSG00000140153','ENSG00000075413']
    import UCSCImport
    import update
    knownAlt_dir = update.getFTPData(
        'hgdownload.cse.ucsc.edu',
        '/goldenPath/currentGenomes/' + species_full + '/database',
        'knownAlt.txt.gz')
    polyA_dir = update.getFTPData(
        'hgdownload.cse.ucsc.edu',
        '/goldenPath/currentGenomes/' + species_full + '/database',
        'polyaDb.txt.gz')
    output_dir = 'AltDatabase/ucsc/' + species + '/'
    UCSCImport.downloadFiles(knownAlt_dir, output_dir)
    UCSCImport.downloadFiles(polyA_dir, output_dir)
    sys.exit()
    ensembl_ucsc_splicing_annotations = importEnsExonStructureData(
        species, ensembl_gene_coordinates, ensembl_annotations,
        exon_annotation_db)