def Indexing(counts, PSIFileDir, output): """ Indexing is the process of creating strand machine readable binary for SashimiPlot """ gene_feature_db = reimportFeatures( counts) ### import the exon and gene coordinates PSIJunctions = importPSIJunctions( PSIFileDir) ### Retreive all PSI junction pairs PSIJunctions = importReciprocalJunctions( PSIFileDir, PSIJunctions) ### Include other Junctions from ASPIRE/LinRegress exported = False for (junction1, junction2) in PSIJunctions: #if 'ENSMUSG00000066007:E5.1-ENSMUSG00000063245:E3.1' in PSIJunctions[event_pair]: geneID = string.split(junction1, '__')[0] if geneID in gene_feature_db: try: chr, junction_start, junction_end, strand, selected_event = exportToGff( junction1, junction2, gene_feature_db[geneID], geneID) exported = True except Exception: pass ### Not handling trans-splicing well if exported: ### For each exon entry exported for this event, we need a gene entry with the boundary exon positions exported (returned from the above function) try: writegene( chr, junction_start, junction_end, strand, selected_event ) #('chrX', '38600355', '38606652', '+', 'ENSMUSG00000000355__E1.2-E5.1') except Exception: pass ### Export coordinate information for the entire gene for geneID in gene_feature_db: feature_db = gene_feature_db[geneID] for event in feature_db: if 'E1.1-E100.1' in event: pd = feature_db[event] chr = pd.Chr() start, stop = pd.Start(), pd.End() if start > stop: strand = '-' else: strand = '+' try: chr, junction_start, junction_end, strand, selected_event = exportToGff( 'null', event, gene_feature_db[geneID], geneID) writegene( chr, junction_start, junction_end, strand, selected_event ) #('chrX', '38600355', '38606652', '+', 'ENSMUSG00000000355__E1.2-E5.1') except Exception: pass time.sleep(2) gff_export_obj.close() newout = findParentDir(output) + 'sashimi_index/' try: ifg.index_gff(output, newout) except Exception: print traceback.format_exc() sys.exit() print('error in indexing')
def Indexing(counts,inputpsi,output): gene_feature_db=reimportFeatures(counts) PSIJunctions=importPSIJunctions(inputpsi) PSIJunctions=importReciprocalJunctions(inputpsi,PSIJunctions) for i in range(len(PSIJunctions)): #if 'ENSMUSG00000066007:E5.1-ENSMUSG00000063245:E3.1' in PSIJunctions[i]: event_split=string.split(PSIJunctions[i],"__") gene = event_split[0] if gene in gene_feature_db: try: chrom,start_l,end_l,a,ch1=write_index(PSIJunctions[i],gene_feature_db[gene]) except Exception: pass ### Not handling trans-splicing well for k in range(len(event_split)): if "ENS" in event_split[k] or '00000' in event_split[k]: if '-' in event_split[k]: ji=string.split(event_split[k],'-') geneID=ji[1] else: geneID=event_split[k] if geneID != event_split[0]: if geneID in gene_feature_db: try: chrom,start_l,end_l,a,ch1= write_index(PSIJunctions[i],gene_feature_db[geneID]) except Exception: pass ### Not handling trans-splicing well try: writegene(chrom,start_l,end_l,a,ch1) #('chrX', '38600355', '38606652', '+', 'ENSMUSG00000000355__E1.2-E5.1') except Exception: pass for gene in gene_feature_db: for event in gene_feature_db[gene]: if '100.1' in event: event, position_data = string.split(event,'=') chr, pos = string.split(position_data,'__') start,stop = string.split(pos,'-') if float(stop)>float(start): strand = '+' else: strand = '-' geneID = string.split(event,'__')[0] ### just use the gene ID #chrom,start_l,end_l,a,ch1= write_index(event+' '+string.replace(event,'100','99'),gene_feature_db[geneID]) manualWriteExon(chr,start,stop,strand,geneID) writegene(chr,start,stop,strand,geneID) time.sleep(2) export_in.close() newout=findParentDir(output)+'sashimi_index/' try: ifg.index_gff(output,newout) except Exception: print traceback.format_exc();sys.exit() print('error in indexing')
def Indexing(counts,inputpsi,output): head=0 index_read=indexdic(counts) #print len(index_read) #for k in range(len(a['AltAnalyze_ID'])): gene_label=genelist(inputpsi) for i in range(len(gene_label)): #if 'ENSMUSG00000066007:E5.1-ENSMUSG00000063245:E3.1' in gene_label[i]: try: if head==0: head=1 continue p=string.split(gene_label[i],":") #print gene_label[i] if p[0] in index_read: chrom,start_l,end_l,a,ch1=write_index(gene_label[i],index_read[p[0]]) for k in range(len(p)): #print p[k] if "ENS" in p[k]: if '-' in p[k]: ji=string.split(p[k],'-') jj=ji[1] else: jj=p[k] if jj != p[0]: if jj in index_read: chrom,start_l,end_l,a,ch1= write_index(gene_label[i],index_read[jj]) writegene(chrom,start_l,end_l,a,ch1) except Exception: continue time.sleep(2) export_in.close() newout=findParentDir(output)+'trial_index/' try: ifg.index_gff(output,newout) except Exception: print('error in indexing')
def Indexing(counts, inputpsi, output): head = 0 index_read = indexdic(counts) #print len(index_read) #for k in range(len(a['AltAnalyze_ID'])): gene_label = genelist(inputpsi) for i in range(len(gene_label)): #if 'ENSMUSG00000066007:E5.1-ENSMUSG00000063245:E3.1' in gene_label[i]: try: if head == 0: head = 1 continue p = string.split(gene_label[i], ":") #print gene_label[i] if p[0] in index_read: chrom, start_l, end_l, a, ch1 = write_index( gene_label[i], index_read[p[0]]) for k in range(len(p)): #print p[k] if "ENS" in p[k]: if '-' in p[k]: ji = string.split(p[k], '-') jj = ji[1] else: jj = p[k] if jj != p[0]: if jj in index_read: chrom, start_l, end_l, a, ch1 = write_index( gene_label[i], index_read[jj]) writegene(chrom, start_l, end_l, a, ch1) except Exception: continue time.sleep(2) export_in.close() newout = findParentDir(output) + 'trial_index/' try: ifg.index_gff(output, newout) except Exception: print('error in indexing')
def Indexing(counts,PSIFileDir,output): """ Indexing is the process of creating strand machine readable binary for SashimiPlot """ gene_feature_db=reimportFeatures(counts) ### import the exon and gene coordinates PSIJunctions=importPSIJunctions(PSIFileDir) ### Retreive all PSI junction pairs PSIJunctions=importReciprocalJunctions(PSIFileDir,PSIJunctions) ### Include other Junctions from ASPIRE/LinRegress exported = False for (junction1,junction2) in PSIJunctions: #if 'ENSMUSG00000066007:E5.1-ENSMUSG00000063245:E3.1' in PSIJunctions[event_pair]: geneID = string.split(junction1,'__')[0] if geneID in gene_feature_db: try: chr,junction_start,junction_end,strand,selected_event=exportToGff(junction1,junction2,gene_feature_db[geneID],geneID) exported=True except Exception: pass ### Not handling trans-splicing well if exported: ### For each exon entry exported for this event, we need a gene entry with the boundary exon positions exported (returned from the above function) try: writegene(chr,junction_start,junction_end,strand,selected_event) #('chrX', '38600355', '38606652', '+', 'ENSMUSG00000000355__E1.2-E5.1') except Exception: pass ### Export coordinate information for the entire gene for geneID in gene_feature_db: feature_db = gene_feature_db[geneID] for event in feature_db: if 'E1.1-E100.1' in event: pd = feature_db[event] chr = pd.Chr() start,stop = pd.Start(), pd.End() if start>stop: strand = '-' else: strand = '+' try: chr,junction_start,junction_end,strand,selected_event=exportToGff('null',event,gene_feature_db[geneID],geneID) writegene(chr,junction_start,junction_end,strand,selected_event) #('chrX', '38600355', '38606652', '+', 'ENSMUSG00000000355__E1.2-E5.1') except Exception: pass time.sleep(2) gff_export_obj.close() newout=findParentDir(output)+'sashimi_index/' try: ifg.index_gff(output,newout) except Exception: print traceback.format_exc();sys.exit() print('error in indexing')