gensimulatedreadsdata(sample_id,replicate_id,folderdict,genetxreaddict,genetxcigardict,cfg) if cfg.numtypes==1: genetxreaddict=getsimulatedoneexpression(genetxcountdict,cfg.readcount) for replicate_id in range(1,cfg.numdatasets+1): sample_id=1 gensimulatedreadsdata(sample_id,replicate_id,folderdict,genetxreaddict,genetxcigardict,cfg) if cfg.numtypes==2: #print cfg.coverageratiodistribution,cfg.jsddistribution genetwoexpressions=getsimulatedtwoexpressions(genetxcountdict,cfg.coverageratiodistribution,cfg.jsddistribution,cfg.readcount,cfg.fold_flg) genetxreaddict1=dict([(gene,[genetwoexpressions[gene][0],genetwoexpressions[gene][2]]) for gene in genetwoexpressions]) genetxreaddict2=dict([(gene,[genetwoexpressions[gene][1],genetwoexpressions[gene][3]]) for gene in genetwoexpressions]) jsdmetadatafile='%s/expression_jsd.txt'%(folderdict['metadata']) fout=open(jsdmetadatafile,'w') for gene in genetwoexpressions: fout.write('%s\t% 7d\t% 7d\t%s\t%s\t%6.4f\n'%(gene,genetwoexpressions[gene][0],genetwoexpressions[gene][1], common.fl2str(genetwoexpressions[gene][2]),common.fl2str(genetwoexpressions[gene][3]), genetwoexpressions[gene][4])) fout.close() #Todo Plot sample_id=1 for replicate_id in range(1,cfg.numdatasets+1): gensimulatedreadsdata(sample_id,replicate_id,folderdict,genetxreaddict1,genetxcigardict,cfg) sample_id=2 for replicate_id in range(1,cfg.numdatasets+1): gensimulatedreadsdata(sample_id,replicate_id,folderdict,genetxreaddict2,genetxcigardict,cfg) # if debug_flg==0: # deletesamfiles(folderdict) #print folderdict deletesamfiles(folderdict)
def divdictToflow(self,divdict): ''' divdict=position:[[incoming/outgoing=0,1,exonstart=0=no,1=yes,2=start transcript,3=end transcript][exon, flowlist]] exonstart=0=no,1=yes,2=start transcript and exonstart,3=end transcript and exonstart,4=start transcript and no exonstart(insplice), 5=end transcript and no exonstart outsplice flowlist=[exon/splicelist] wtgraphstruct=(exonlist,intronlist,splicelist,startnodelist,endnodelist,novelnodelist,exonwtlist,intronwtlist,splicewtlist) flowlist len=1 for start or end transcript getedgevalue edgetype = 10/11,20/21,3 = exon,intron,splice flowdict[position]=[[outgoingflg,exonstartflg],[[wt1,wt2],nflowvec]] ''' flowdict={} for position in divdict.keys(): flowvec=[] outgoingflg,exonstartflg=divdict[position][0] exon,flowlist=divdict[position][1] #message='Flowlist : %s'%str(flowlist) #common.printstatus(message,'S',common.func_name()) edgetype=10+outgoingflg wt1=self.getedgevalue(exon,edgetype)[0] if exonstartflg==2: if len(flowlist)!=1: message='Transcript Start Exon has incoming splice; Flowlist : %s, %s'%(str(exon),common.fl2str(flowlist)) common.printstatus(message,'W',common.func_name()) else: #incoming flow edgetype=10+outgoingflg #prev exon wtoth=self.getedgevalue(flowlist[0],edgetype)[0] flowvec=[wtoth,max(wt1-wtoth,0)] if wt1-wtoth<0: message='Flow decreases at transcript start exon: %s; Prev Exon: %s; Weight Start=%10.4f, Before=%10.4f'%(str(exon),common.fl2str(flowlist[0]),wt1,wtoth) common.printstatus(message,'W',common.func_name()) #wt2=wt1 wt2=sum(flowvec) elif exonstartflg==3: if len(flowlist)!=1: message='Transcript End Exon has outgoing splice; Flowlist : %s, %s'%(str(exon),common.fl2str(flowlist)) common.printstatus(message,'W',common.func_name()) else: #outgoing flow edgetype=10+outgoingflg #next exon wtoth=self.getedgevalue(flowlist[0],edgetype)[0] flowvec=[wtoth,max(wt1-wtoth,0)] if wt1-wtoth<0: message='Flow increases at transcript end exon %s: Prev Exon: %s; Weight End=%10.4f, After=%10.4f'%(str(exon),common.fl2str(flowlist[0]),wt1,wtoth) common.printstatus(message,'W',common.func_name()) #wt2=wt1 wt2=sum(flowvec) elif exonstartflg==1: flowvec=[] edgetype=10+outgoingflg flowvec.append(self.getedgevalue(flowlist[0],edgetype)[0]) for flowedge in flowlist[1:]: flowvec.append(self.getedgevalue(flowedge,3)[0]) wt2=sum(flowvec) elif exonstartflg==0: flowvec=[] for flowedge in flowlist: flowvec.append(self.getedgevalue(flowedge,3)[0]) wt2=sum(flowvec) if len(flowvec)>0: nflowvec=common.normalize_vector(flowvec) flowdict[position]=[[outgoingflg,exonstartflg],[[wt1,wt2],nflowvec]] return flowdict
cfg.jsddistribution, cfg.readcount, cfg.fold_flg) genetxreaddict1 = dict([ (gene, [genetwoexpressions[gene][0], genetwoexpressions[gene][2]]) for gene in genetwoexpressions ]) genetxreaddict2 = dict([ (gene, [genetwoexpressions[gene][1], genetwoexpressions[gene][3]]) for gene in genetwoexpressions ]) jsdmetadatafile = '%s/expression_jsd.txt' % (folderdict['metadata']) fout = open(jsdmetadatafile, 'w') for gene in genetwoexpressions: fout.write('%s\t% 7d\t% 7d\t%s\t%s\t%6.4f\n' % (gene, genetwoexpressions[gene][0], genetwoexpressions[gene][1], common.fl2str(genetwoexpressions[gene][2]), common.fl2str(genetwoexpressions[gene][3]), genetwoexpressions[gene][4])) fout.close() #Todo Plot sample_id = 1 for replicate_id in range(1, cfg.numdatasets + 1): gensimulatedreadsdata(sample_id, replicate_id, folderdict, genetxreaddict1, genetxcigardict, cfg) sample_id = 2 for replicate_id in range(1, cfg.numdatasets + 1): gensimulatedreadsdata(sample_id, replicate_id, folderdict, genetxreaddict2, genetxcigardict, cfg) # if debug_flg==0: # deletesamfiles(folderdict)