Пример #1
0
            gensimulatedreadsdata(sample_id,replicate_id,folderdict,genetxreaddict,genetxcigardict,cfg)
    if cfg.numtypes==1:
        genetxreaddict=getsimulatedoneexpression(genetxcountdict,cfg.readcount)
        for replicate_id in range(1,cfg.numdatasets+1):
            sample_id=1
            gensimulatedreadsdata(sample_id,replicate_id,folderdict,genetxreaddict,genetxcigardict,cfg)
    if cfg.numtypes==2:
        #print cfg.coverageratiodistribution,cfg.jsddistribution
        genetwoexpressions=getsimulatedtwoexpressions(genetxcountdict,cfg.coverageratiodistribution,cfg.jsddistribution,cfg.readcount,cfg.fold_flg)
        genetxreaddict1=dict([(gene,[genetwoexpressions[gene][0],genetwoexpressions[gene][2]]) for gene in genetwoexpressions])
        genetxreaddict2=dict([(gene,[genetwoexpressions[gene][1],genetwoexpressions[gene][3]]) for gene in genetwoexpressions])
        jsdmetadatafile='%s/expression_jsd.txt'%(folderdict['metadata'])
        fout=open(jsdmetadatafile,'w')
        for gene in genetwoexpressions:
            fout.write('%s\t% 7d\t% 7d\t%s\t%s\t%6.4f\n'%(gene,genetwoexpressions[gene][0],genetwoexpressions[gene][1],
                                                          common.fl2str(genetwoexpressions[gene][2]),common.fl2str(genetwoexpressions[gene][3]),
                                                          genetwoexpressions[gene][4]))
        fout.close()
        #Todo Plot
        sample_id=1
        for replicate_id in range(1,cfg.numdatasets+1):
            gensimulatedreadsdata(sample_id,replicate_id,folderdict,genetxreaddict1,genetxcigardict,cfg)            
        sample_id=2
        for replicate_id in range(1,cfg.numdatasets+1):
            gensimulatedreadsdata(sample_id,replicate_id,folderdict,genetxreaddict2,genetxcigardict,cfg)                
    
#    if debug_flg==0:
#        deletesamfiles(folderdict)
    #print folderdict
    deletesamfiles(folderdict)
    
Пример #2
0
 def divdictToflow(self,divdict):
     '''
     divdict=position:[[incoming/outgoing=0,1,exonstart=0=no,1=yes,2=start transcript,3=end transcript][exon, flowlist]]
     exonstart=0=no,1=yes,2=start transcript and exonstart,3=end transcript and exonstart,4=start transcript and no exonstart(insplice),
     5=end transcript and no exonstart outsplice 
     flowlist=[exon/splicelist]
     wtgraphstruct=(exonlist,intronlist,splicelist,startnodelist,endnodelist,novelnodelist,exonwtlist,intronwtlist,splicewtlist)
     flowlist len=1 for start or end transcript
     getedgevalue edgetype = 10/11,20/21,3 = exon,intron,splice
     flowdict[position]=[[outgoingflg,exonstartflg],[[wt1,wt2],nflowvec]]
     '''
     flowdict={}
     for position in divdict.keys():
         flowvec=[]
         outgoingflg,exonstartflg=divdict[position][0]
         exon,flowlist=divdict[position][1]
         #message='Flowlist : %s'%str(flowlist)
         #common.printstatus(message,'S',common.func_name())      
         edgetype=10+outgoingflg
         wt1=self.getedgevalue(exon,edgetype)[0]
         if exonstartflg==2:
             if len(flowlist)!=1:
                 message='Transcript Start Exon has incoming splice; Flowlist : %s, %s'%(str(exon),common.fl2str(flowlist))
                 common.printstatus(message,'W',common.func_name())     
             else:
                 #incoming flow
                 edgetype=10+outgoingflg
                 #prev exon
                 wtoth=self.getedgevalue(flowlist[0],edgetype)[0]
                 flowvec=[wtoth,max(wt1-wtoth,0)]
                 if wt1-wtoth<0:
                     message='Flow decreases at transcript start exon: %s; Prev Exon: %s; Weight Start=%10.4f, Before=%10.4f'%(str(exon),common.fl2str(flowlist[0]),wt1,wtoth)
                     common.printstatus(message,'W',common.func_name())     
                 #wt2=wt1
                 wt2=sum(flowvec)
         elif exonstartflg==3:
             if len(flowlist)!=1:
                 message='Transcript End Exon has outgoing splice; Flowlist : %s, %s'%(str(exon),common.fl2str(flowlist))
                 common.printstatus(message,'W',common.func_name())     
             else:
                 #outgoing flow
                 edgetype=10+outgoingflg
                 #next exon
                 wtoth=self.getedgevalue(flowlist[0],edgetype)[0]
                 flowvec=[wtoth,max(wt1-wtoth,0)]
                 if wt1-wtoth<0:
                     message='Flow increases at transcript end exon %s: Prev Exon: %s; Weight End=%10.4f, After=%10.4f'%(str(exon),common.fl2str(flowlist[0]),wt1,wtoth)
                     common.printstatus(message,'W',common.func_name())   
                 #wt2=wt1
                 wt2=sum(flowvec)
         elif exonstartflg==1:
             flowvec=[]
             edgetype=10+outgoingflg
             flowvec.append(self.getedgevalue(flowlist[0],edgetype)[0])
             for flowedge in flowlist[1:]:
                 flowvec.append(self.getedgevalue(flowedge,3)[0])
             wt2=sum(flowvec)
         elif exonstartflg==0:
             flowvec=[]
             for flowedge in flowlist:
                 flowvec.append(self.getedgevalue(flowedge,3)[0])
             wt2=sum(flowvec)  
         if len(flowvec)>0:     
             nflowvec=common.normalize_vector(flowvec)
             flowdict[position]=[[outgoingflg,exonstartflg],[[wt1,wt2],nflowvec]]
     return flowdict
Пример #3
0
            cfg.jsddistribution, cfg.readcount, cfg.fold_flg)
        genetxreaddict1 = dict([
            (gene, [genetwoexpressions[gene][0], genetwoexpressions[gene][2]])
            for gene in genetwoexpressions
        ])
        genetxreaddict2 = dict([
            (gene, [genetwoexpressions[gene][1], genetwoexpressions[gene][3]])
            for gene in genetwoexpressions
        ])
        jsdmetadatafile = '%s/expression_jsd.txt' % (folderdict['metadata'])
        fout = open(jsdmetadatafile, 'w')
        for gene in genetwoexpressions:
            fout.write('%s\t% 7d\t% 7d\t%s\t%s\t%6.4f\n' %
                       (gene, genetwoexpressions[gene][0],
                        genetwoexpressions[gene][1],
                        common.fl2str(genetwoexpressions[gene][2]),
                        common.fl2str(genetwoexpressions[gene][3]),
                        genetwoexpressions[gene][4]))
        fout.close()
        #Todo Plot
        sample_id = 1
        for replicate_id in range(1, cfg.numdatasets + 1):
            gensimulatedreadsdata(sample_id, replicate_id, folderdict,
                                  genetxreaddict1, genetxcigardict, cfg)
        sample_id = 2
        for replicate_id in range(1, cfg.numdatasets + 1):
            gensimulatedreadsdata(sample_id, replicate_id, folderdict,
                                  genetxreaddict2, genetxcigardict, cfg)

#    if debug_flg==0:
#        deletesamfiles(folderdict)