def test_sampleLabelings(self): tree = Tree("(A:1,(B:1,(C:1,(E:1,D:1)Int_1:0.5[&&NHX:ancient=1])Int_2:0.5[&&NHX:ancient=0])Int_3:1)Root;", format=1) chrom = {} chrom["one"] = ["3","4"] species = {} species["C"] = chrom chrom = {} chrom["one"] = ["3","4"] species["D"] = chrom chrom = {} chrom["one"] = [] species["E"] = chrom chrom = {} chrom["one"] = [] species["A"] = chrom chrom = {} chrom["one"] = [] species["B"] = chrom adj = getAdjacencies.findAdjacencies(species) paths = getAdjacencies.findTreePaths(tree) internal,adjacenciesAncestral = getAdjacencies.assignAncestralAdjacencies(paths,adj,tree) graphs = globalAdjacencyGraph.createGraph(adj,adjacenciesAncestral) jointLabels, first = SR.enumJointLabelings(graphs) probs={"Int_1":{(6, 7):0.1},"Int_2":{(6, 7):0.1},"Int_3":{(6, 7):0.1},"Root":{(6, 7):0.1}} for i in range(0,10): validLabels, validAtNode = SR.validLabels(jointLabels,first) resolvedCCs = SR.sampleLabelings(tree, graphs, validAtNode, adj,probs, alpha=0) reconstructedAdj = SR.reconstructedAdjacencies(resolvedCCs) print reconstructedAdj
def test_enumJointLabelings(self): #print nx.maximal_matching(self.graph) #print nx.max_weight_matching(self.graph) joint,first = SR.enumJointLabelings([self.graph]) TestCase.assertEqual(self,len(joint[self.graph]),15) valid, validAtNode = SR.validLabels(joint,first) TestCase.assertEqual(self,len(valid[self.graph]),8)
def runSample(params): #retrieving the given parameter ccs=params[0] tree=params[1] extantAdjacencies=params[2] adjacencyProbs=params[3] alpha=params[4] i=params[5] extantAdjacencies_species_adj=params[6] outputDirectory=params[7] reconstructedMarkerCount=params[8] allSampleReconstructionStatistic={} dict_SCJ={} #lock = multiprocessing.Lock() #output text log outLog="Sample: "+str(i)+"\n" #start sampling method like in the Main.py outLog+="Enumerate joint labelings...\n" jointLabels, first = SR.enumJointLabelings(ccs) outLog+="Check valid labels...\n" validLabels, validAtNode = SR.validLabels(jointLabels, first) #lock.acquire() outLog+= "Compute ancestral labels with SR...\n" topDown = SR.sampleLabelings(tree, ccs, validAtNode, extantAdjacencies, adjacencyProbs, alpha) #lock.release() reconstructedAdj = SR.reconstructedAdjacencies(topDown) SR.outputReconstructedAdjacencies(reconstructedAdj, outputDirectory+"/reconstructed_adjacencies_" + str(i)) for node in reconstructedAdj: # count for each adjaency on each internal node, how often this adjacencies over all samples occurs there for adjacency in reconstructedAdj[node]: #lock.acquire() if (node,adjacency) in allSampleReconstructionStatistic: allSampleReconstructionStatistic[(node,adjacency)] += 1 else: allSampleReconstructionStatistic.update({(node,adjacency):1}) #lock.release() outLog+="Scaffolding...\n" scaffolds = scaffolding.scaffoldAdjacencies(reconstructedAdj) undoubled = scaffolding.undoubleScaffolds(scaffolds) scaffolding.outputUndoubledScaffolds(undoubled, outputDirectory+"/undoubled_scaffolds_" + str(i)) scaffolding.outputScaffolds(scaffolds, outputDirectory+"/doubled_scaffolds_" + str(i)) log=scaffolding.sanityCheckScaffolding(undoubled) outLog+=log for node in undoubled: outLog+= str(node)+'\n' markerCounter = 0 for scaffold in undoubled[node]: first = scaffold[0] last = scaffold[-1] if not first == last: markerCounter = markerCounter + len(scaffold) else: markerCounter = markerCounter + len(scaffold) - 1 outLog+= str(node) + " number of reconstructed undoubled marker in scaffolds: " + str(markerCounter)+'\n' # number of reconstructed markerIds given by reconstructedMarkerCount # singleton scaffolds number / number of not reconstructed marker notReconstructedMarkerCount = reconstructedMarkerCount - markerCounter # number of all scaffolds allScaffoldCount = len(undoubled[node]) + notReconstructedMarkerCount outLog+= str(node) + " number of singleton scaffolds (not reconstructed marker): " + str( notReconstructedMarkerCount)+'\n' outLog+= str(node) + " number of scaffolds: " + str(allScaffoldCount)+'\n' #lock.acquire() scj = calculate_SCJ(tree, reconstructedAdj, extantAdjacencies_species_adj) outLog+="Single-Cut-or-Join-Distance: " + str(scj)+'\n' dict_SCJ.update({'Sample_' + str(i): scj}) #lock.release() return (allSampleReconstructionStatistic,dict_SCJ,outLog)
adjacencyProbs[species]={adj:weight} line=f.readline() f.close() #dictionary for all scj distances dict_SCJ={} #compute CCs in global adjacency graph ccs = globalAdjacencyGraph.createGraph(extantAdjacencies,nodesPerAdjacency) if (not args.skip_first): conflicts = globalAdjacencyGraph.analyseConnectedComponents(ccs) globalAdjacencyGraph.outputConflicts(conflicts,args.output+"/conflicts") jointLabels, first = SR.enumJointLabelings(ccs) validLabels, validAtNode = SR.validLabels(jointLabels,first) topDown = SR.computeLabelings(tree, ccs, validAtNode, extantAdjacencies, adjacencyProbs, args.alpha) reconstructedAdj = SR.reconstructedAdjacencies(topDown) SR.outputReconstructedAdjacencies(reconstructedAdj,args.output+"/reconstructed_adjacencies") for node in reconstructedAdj: print node print "Number of reconstructed adjacencies: "+str(len(reconstructedAdj[node])) scaffolds = scaffolding.scaffoldAdjacencies(reconstructedAdj) undoubled = scaffolding.undoubleScaffolds(scaffolds) scaffolding.outputUndoubledScaffolds(undoubled,args.output+"/undoubled_scaffolds") scaffolding.outputScaffolds(scaffolds,args.output+"/doubled_scaffolds") scaffolding.sanityCheckScaffolding(undoubled)