Пример #1
0
def runSample(params):
        #retrieving the given parameter
        ccs=params[0]
        tree=params[1]
        extantAdjacencies=params[2]
        adjacencyProbs=params[3]
        alpha=params[4]
        i=params[5]
        extantAdjacencies_species_adj=params[6]
        outputDirectory=params[7]
        reconstructedMarkerCount=params[8]
        allSampleReconstructionStatistic={}
        dict_SCJ={}
        #lock = multiprocessing.Lock()
        #output text log
        outLog="Sample: "+str(i)+"\n"

        #start sampling method like in the Main.py
        outLog+="Enumerate joint labelings...\n"
        jointLabels, first = SR.enumJointLabelings(ccs)
        outLog+="Check valid labels...\n"
        validLabels, validAtNode = SR.validLabels(jointLabels, first)

        #lock.acquire()
        outLog+= "Compute ancestral labels with SR...\n"
        topDown = SR.sampleLabelings(tree, ccs, validAtNode, extantAdjacencies, adjacencyProbs, alpha)
        #lock.release()
        reconstructedAdj = SR.reconstructedAdjacencies(topDown)
        SR.outputReconstructedAdjacencies(reconstructedAdj, outputDirectory+"/reconstructed_adjacencies_" + str(i))

        for node in reconstructedAdj:
            # count for each adjaency on each internal node, how often this adjacencies over all samples occurs there
            for adjacency in reconstructedAdj[node]:
                #lock.acquire()
                if (node,adjacency) in allSampleReconstructionStatistic:
                    allSampleReconstructionStatistic[(node,adjacency)] += 1
                else:
                    allSampleReconstructionStatistic.update({(node,adjacency):1})
                #lock.release()
        outLog+="Scaffolding...\n"
        scaffolds = scaffolding.scaffoldAdjacencies(reconstructedAdj)
        undoubled = scaffolding.undoubleScaffolds(scaffolds)
        scaffolding.outputUndoubledScaffolds(undoubled, outputDirectory+"/undoubled_scaffolds_" + str(i))
        scaffolding.outputScaffolds(scaffolds, outputDirectory+"/doubled_scaffolds_" + str(i))
        log=scaffolding.sanityCheckScaffolding(undoubled)
        outLog+=log
        for node in undoubled:
            outLog+= str(node)+'\n'
            markerCounter = 0
            for scaffold in undoubled[node]:
                first = scaffold[0]
                last = scaffold[-1]
                if not first == last:
                    markerCounter = markerCounter + len(scaffold)
                else:
                    markerCounter = markerCounter + len(scaffold) - 1
            outLog+= str(node) + " number of reconstructed undoubled marker in scaffolds: " + str(markerCounter)+'\n'
            # number of reconstructed markerIds given by reconstructedMarkerCount
            # singleton scaffolds number / number of not reconstructed marker
            notReconstructedMarkerCount = reconstructedMarkerCount - markerCounter
            # number of all scaffolds
            allScaffoldCount = len(undoubled[node]) + notReconstructedMarkerCount
            outLog+= str(node) + " number of singleton scaffolds (not reconstructed marker): " + str(
                notReconstructedMarkerCount)+'\n'
            outLog+= str(node) + " number of scaffolds: " + str(allScaffoldCount)+'\n'


        #lock.acquire()
        scj = calculate_SCJ(tree, reconstructedAdj, extantAdjacencies_species_adj)
        outLog+="Single-Cut-or-Join-Distance: " + str(scj)+'\n'
        dict_SCJ.update({'Sample_' + str(i): scj})
        #lock.release()
        return (allSampleReconstructionStatistic,dict_SCJ,outLog)
Пример #2
0
            if not first == last:
                markerCounter = markerCounter + len(scaffold)
            else:
                markerCounter = markerCounter + len(scaffold)-1
        print node+" number of reconstructed undoubled marker in scaffolds: "+str(markerCounter)
        # number of reconstructed markerIds
        reconstructedMarkerCount = len(reconstructedMarker)
        # singleton scaffolds number / number of not reconstructed marker
        notReconstructedMarkerCount = reconstructedMarkerCount - markerCounter
        # number of all scaffolds
        allScaffoldCount = len(undoubled[node]) + notReconstructedMarkerCount
        print node + " number of singleton scaffolds (not reconstructed marker): " + str(notReconstructedMarkerCount)
        print node + " number of scaffolds: " + str(allScaffoldCount)

        #calculate SCJ-distance for unsampled solution
        scj_unsampled=calculate_SCJ(tree, reconstructedAdj, extantAdjacencies_species_adj)
        dict_SCJ.update({'Unsampled':scj_unsampled})

print time.time() - t0, "seconds process time"
t_sampling=time.time()

#dictionary for statistics of reconstructed Adjacencies
#structure: >internal node  adjacency   number of how often this adj was reconstructed at this node among all samples
allSampleReconstructionStatistic={}

#Sampling
#only if a number of samples is given and if the sript is called as standalone (not imported) script
if args.sampling and  __name__ == '__main__':
    print "SAMPLING"
    # reconstruct marker pairs out of extantAdjacencies
    reconstructedMarker = set()