def computeSNVTreeError(snvMatrix, cMatrix, lafMatrix, realTree):
    sampleNum = snvMatrix.shape[1]

    cObjMatrix = np.empty(cMatrix.shape, dtype=object)
    for row in range(0, cMatrix.shape[0]):
        for col in range(0, cMatrix.shape[1]):
            currentC = cMatrix[row][col]
            cObj = C([
                2, int(currentC)
            ], [])  #empty vector to stop initialization of allele combinations
            dummyCMu = DummyCMu()
            dummyCMu.c = cObj
            cObjMatrix[row][col] = dummyCMu

    [chromosomes, positions, variantIndices] = obtainSomaticVariantIndices()
    #print variantIndices

    #Compute the distance pairwise between samples
    distanceMatrix = np.empty([sampleNum, sampleNum], dtype=float)

    for sample1 in range(0, sampleNum):
        for sample2 in range(0, sampleNum):

            #Make the sample objects. These now need somatic variants and a CMu
            sample1Obj = Sample(None, None)
            sample1Obj.bestCMu = cObjMatrix[:, sample1]
            #the dummy c mu is actually a list of dummy c mu's, so we need to make one for each c
            #dummyCMu = DummyCMu()
            #dummyCMu.c = cObjMatrix[:,sample1]
            #sample1Obj.bestCMu = dummyCMu
            sample1Obj.somaticVariants = snvMatrix[:, sample1]
            sample1Obj.somaticVariantsInd = variantIndices
            sample1Obj.measurements = LAF(lafMatrix[:, sample1], chromosomes,
                                          positions, positions)
            sample2Obj = Sample(None, None)
            #dummyCMu = DummyCMu()
            #dummyCMu.c = cObjMatrix[:,sample2]
            sample2Obj.bestCMu = cObjMatrix[:, sample2]
            sample2Obj.somaticVariants = snvMatrix[:, sample2]
            sample2Obj.somaticVariantsInd = variantIndices
            sample2Obj.measurements = LAF(lafMatrix[:, sample2], chromosomes,
                                          positions, positions)
            #The distance can be computed for the entire column at once using the FST
            [
                messages, dist
            ] = SomaticVariantDistance().computeDistanceBetweenSomaticVariants(
                sample1Obj, sample2Obj, sample1, sample2)

            distanceMatrix[sample1, sample2] = dist

    #Compute the MST
    fullGraph = generateInitialTree(distanceMatrix, realTree.vertices)
    mst = computeMST(fullGraph, realTree.vertices)
    simulationErrorHandler = SimulationErrorHandler()
    treeScore = simulationErrorHandler.computeTreeError([mst], realTree)
    return treeScore
Пример #2
0
#obtain the chromosome, start and end information from the other samples
healthySample.measurements = LAF([0.5] * measurementLength,
                                 tmpSamples[0].measurements.chromosomes,
                                 tmpSamples[0].measurements.starts,
                                 tmpSamples[0].measurements.ends)
healthySample.somaticVariants = [0] * somVarNum
healthySample.somaticVariantsInd = tmpSamples[0].somaticVariantsInd
healthySample.setParent(None)
healthySample.name = 'Precursor'  #do not call it healthy, it may also be a 4N precursor.

#Make a dummy bestCMu for the healthy sample
eventDistances = targetClone.eventDistances
bestCMuHealthy = CMuCombination(C([2, precursorPloidy]),
                                Mu(precursorTumorFrequency), eventDistances)

healthySample.bestCMu = [bestCMuHealthy] * measurementLength
healthySample.originalCMu = healthySample.bestCMu

#Define all samples
samples = [healthySample] + tmpSamples

#get all somatic variants in one binary numpy array
allSomaticVariants = np.zeros((somVarNum, len(samples)))
for sample in range(0, len(samples)):
    allSomaticVariants[:, sample] = samples[sample].somaticVariants

    #also set the segmentation while we're at it
    samples[sample].measurements.segmentation = segmentation

targetClone.segmentation = segmentation
#The run part will also draw the trees automatically.