def importSplicingAnnotationDatabaseAndSequence(species, array_type, biotype): array_ens_db = {} if array_type == 'AltMouse': filename = 'AltDatabase/' + species + '/' + array_type + '/' + array_type + '-Ensembl_relationships.txt' update.verifyFile(filename, array_type) ### Will force download if missing fn = filepath(filename) x = 0 for line in open(fn, 'r').xreadlines(): data, newline = string.split(line, '\n') t = string.split(data, '\t') if x == 0: x = 1 else: array_gene, ens_gene = t try: array_ens_db[array_gene].append(ens_gene) except KeyError: array_ens_db[array_gene] = [ens_gene] filename = 'AltDatabase/' + species + '/' + array_type + '/' + array_type + '_critical-junction-seq.txt' fn = filepath(filename) probeset_seq_db = {} x = 0 for line in open(fn, 'r').xreadlines(): data, newline = string.split(line, '\n') t = string.split(data, '\t') if x == 0: x = 1 else: probeset, probeset_seq, junction_seq = t junction_seq = string.replace(junction_seq, '|', '') probeset_seq_db[probeset] = probeset_seq, junction_seq ###Import reciprocol junctions, so we can compare these directly instead of hits to nulls and combine with sequence data ###This short-cuts what we did in two function in ExonSeqModule with exon level data filename = 'AltDatabase/' + species + '/' + array_type + '/' + array_type + '_junction-comparisons.txt' fn = filepath(filename) probeset_gene_seq_db = {} x = 0 for line in open(fn, 'r').xreadlines(): data, newline = string.split(line, '\n') t = string.split(data, '\t') if x == 0: x = 1 else: array_gene, probeset1, probeset2, critical_exons = t #; critical_exons = string.split(critical_exons,'|') probesets = [probeset1, probeset2] if array_type == 'junction' or array_type == 'RNASeq': array_ens_db[array_gene] = [array_gene] if array_gene in array_ens_db: ensembl_gene_ids = array_ens_db[array_gene] for probeset_id in probesets: if probeset_id in probeset_seq_db: probeset_seq, junction_seq = probeset_seq_db[ probeset_id] if biotype == 'gene': for ensembl_gene_id in ensembl_gene_ids: probe_data = ExonSeqModule.JunctionDataSimple( probeset_id, ensembl_gene_id, array_gene, probesets, critical_exons) probe_data.SetExonSeq(probeset_seq) probe_data.SetJunctionSeq(junction_seq) try: probeset_gene_seq_db[ ensembl_gene_id].append(probe_data) except KeyError: probeset_gene_seq_db[ensembl_gene_id] = [ probe_data ] else: ### Used for probeset annotations downstream of sequence alignment in LinkEST, analagous to exon_db for exon analyses probe_data = ExonSeqModule.JunctionDataSimple( probeset_id, ensembl_gene_ids, array_gene, probesets, critical_exons) probe_data.SetExonSeq(probeset_seq) probe_data.SetJunctionSeq(junction_seq) probeset_gene_seq_db[probeset_id] = probe_data print len(probeset_gene_seq_db), "genes with probeset sequence associated" return probeset_gene_seq_db