Exemplo n.º 1
0
def exportChromosomeStrandCoordinates(species):
    import EnsemblImport
    gene_location_db = EnsemblImport.getEnsemblGeneLocations(
        species, 'RNASeq', 'key_by_array')

    import ExpressionBuilder
    gene_biotype_db = ExpressionBuilder.importTranscriptBiotypeAnnotations(
        species)
    export_path = 'GenMAPPDBs/' + species + '/chr_gene_locations.txt'
    export_data = export.ExportFile(export_path)

    import ExonAnalyze_module
    gene_annotation_file = "AltDatabase/ensembl/" + species + "/" + species + "_Ensembl-annotations.txt"
    annotate_db = ExonAnalyze_module.import_annotations(
        gene_annotation_file, 'RNASeq')

    print 'Annotations for', len(gene_location_db), 'genes imported'

    sorted_list = []
    protein_coding = 0
    for gene in gene_location_db:
        chr, strand, start, end = gene_location_db[gene]
        if gene in gene_biotype_db:
            biotype = gene_biotype_db[gene][-1]
            if biotype == 'protein_coding': protein_coding += 1

        else: biotype = 'NA'
        if len(chr) < 7:
            sorted_list.append(
                [chr, strand, int(start),
                 int(end), gene, biotype])
        #else: print chr;sys.exit()
    print len(sorted_list), 'genes for typical chromosomes present'
    print protein_coding, 'protein coding genes present'
    sorted_list.sort()
    for values in sorted_list:
        chr, strand, start, end, gene, biotype = values
        try:
            symbol = annotate_db[gene].Symbol()
        except Exception:
            symbol = ''
        values = [gene, symbol, chr, strand, str(start), str(end), biotype]
        export_data.write(string.join(values, '\t') + '\n')
    export_data.close()
    print species, 'chromosome locations exported to:\n', export_path
Exemplo n.º 2
0
def exportChromosomeStrandCoordinates(species):
    import EnsemblImport
    gene_location_db = EnsemblImport.getEnsemblGeneLocations(species,'RNASeq','key_by_array')

    import ExpressionBuilder
    gene_biotype_db = ExpressionBuilder.importTranscriptBiotypeAnnotations(species)
    export_path = 'GenMAPPDBs/'+species+'/chr_gene_locations.txt'
    export_data = export.ExportFile(export_path)

    import ExonAnalyze_module
    gene_annotation_file = "AltDatabase/ensembl/"+species+"/"+species+"_Ensembl-annotations.txt"
    annotate_db = ExonAnalyze_module.import_annotations(gene_annotation_file,'RNASeq')
      
    print 'Annotations for',len(gene_location_db),'genes imported'
    
    sorted_list=[]; protein_coding=0 
    for gene in gene_location_db:
        chr,strand,start,end = gene_location_db[gene]
        if gene in gene_biotype_db:
            biotype = gene_biotype_db[gene][-1]
            if biotype == 'protein_coding': protein_coding+=1
                
        else: biotype = 'NA'
        if len(chr)<7:
            sorted_list.append([chr,strand,int(start),int(end),gene,biotype])
        #else: print chr;sys.exit()
    print len(sorted_list),'genes for typical chromosomes present'
    print protein_coding, 'protein coding genes present'
    sorted_list.sort()        
    for values in sorted_list:
        chr,strand,start,end,gene,biotype=values
        try: symbol = annotate_db[gene].Symbol()
        except Exception: symbol = ''
        values = [gene,symbol,chr,strand,str(start),str(end),biotype]
        export_data.write(string.join(values,'\t')+'\n')
    export_data.close()
    print species, 'chromosome locations exported to:\n',export_path
Exemplo n.º 3
0
    eo.close()


if __name__ == '__main__':
    ################  Comand-line arguments ################
    import getopt
    CLIP_dir = None
    species = 'Hs'
    """ Usage:
    bedtools intersect -wb -a /Clip_merged_reproducible_ENCODE/K562/AARS-human.bed -b /annotations/combined/hg19_annotations-full.bed > /test.bed
    """

    if len(
            sys.argv[1:]
    ) <= 1:  ### Indicates that there are insufficient number of command-line arguments
        print 'WARNING!!!! Too commands supplied.'

    else:
        options, remainder = getopt.getopt(sys.argv[1:], '',
                                           ['species=', 'clip='])
        #print sys.argv[1:]
        for opt, arg in options:
            if opt == '--species':
                species = arg
            elif opt == '--clip':
                CLIP_dir = arg

    import ExpressionBuilder
    coding_db = ExpressionBuilder.importTranscriptBiotypeAnnotations(species)
    dataset_peaks = eCLIPimport(CLIP_dir)