Пример #1
def neighbor_joining(dMtx, names):

	def compute_s_measure(dMtx):
		Computes 'S' measure matrix. 'S' measures distance from a given 
		OTU (operational taxonomic unit) to all other OTUs.
		w, h     = dMtx.shape
		nspecies = w
		measures = []
		for i in range(0, nspecies):
			# matrix not symmetric, gather OTUs row and column
			#                             row      U     column
			distances = concatenate((dMtx[i].ravel(), dMtx[:, [i]].ravel()))
			distances = [e for e in distances if isfinite(e)]
			measures.append( sum(distances)/(nspecies-2) )
		return measures
	def compute_m_matrix(dMtx, sMeasure):
		Computes 'M' measure matrix. 'M' measures distance between pairs
		using formula M(i, j) = D(i,j) - S(i) - S(j)
		w, h = dMtx.shape
		mMtx = zeros(dMtx.shape)
		mMtx[mMtx == 0] = float("inf")
		for i in range(0, h):
			for j in range(0, w):
				if isfinite(dMtx[i][j]):
					mMtx[i][j] = dMtx[i][j] - sMeasure[i] - sMeasure[j]
		return mMtx

	def recompute_d_matrix(dMtx, bestPair):
		Recomputes a distance matrix
		w, h   = dMtx.shape
		newMtx = delete(dMtx, bestPair[0], 0)      # delete a row
		newMtx = delete(newMtx, bestPair[0], 1)    # delete a col
		# correct column
		for i in range(bestPair[1]+1, h-1):
			d_idx = i+1 if bestPair[0] <= i else i # correct OTU index
			d_ik = dMtx[d_idx][bestPair[0]] if isfinite(dMtx[d_idx][bestPair[0]]) else dMtx[bestPair[0]][d_idx] 
			d_jk = dMtx[d_idx][bestPair[1]] if isfinite(dMtx[d_idx][bestPair[1]]) else dMtx[bestPair[1]][d_idx]
			d_ij = dMtx[bestPair[0]][bestPair[1]] if isfinite(dMtx[bestPair[0]][bestPair[1]]) else dMtx[bestPair[1]][bestPair[0]]
			newMtx[i][bestPair[1]] = (d_ik + d_jk - d_ij) / 2
		# correct row
		for j in range(0, bestPair[1]+1):
			d_idx = j+1 if bestPair[0] <= j else j # correct OTU index
			d_ik = dMtx[d_idx][bestPair[0]] if isfinite(dMtx[d_idx][bestPair[0]]) else dMtx[bestPair[0]][d_idx] 
			d_jk = dMtx[d_idx][bestPair[1]] if isfinite(dMtx[d_idx][bestPair[1]]) else dMtx[bestPair[1]][d_idx]
			d_ij = dMtx[bestPair[0]][bestPair[1]] if isfinite(dMtx[bestPair[0]][bestPair[1]]) else dMtx[bestPair[1]][bestPair[0]]
			newMtx[bestPair[1]][j] = (d_ik + d_jk - d_ij) / 2.0
		return newMtx

	# main loop
	nodes = {}
	root  = None

	while any(isfinite(dMtx)):
		if dMtx.shape[0] > 2:
			sMeasure = compute_s_measure(dMtx)
			mMtx     = compute_m_matrix(dMtx, sMeasure)
			minPair  = find_min(mMtx)
			match[0] is max. index, match[1] is min. index.
			recomputation of distance matrix will remove match[0], that is 
			row and col with max. index of the best pair. To retain node 
			names in correct order, we remove the node name in max. index, 
			and rename the node name in min. index to ancestor's name 
			matchNames        = (names[minPair[0]], names[minPair[1]])
			ancestorName      = "[%s + %s]" % matchNames
			names[minPair[1]] = ancestorName
			commonAncestor = NeighborJoiningNode(ancestorName)
			if matchNames[0] in nodes:
				sndNode = nodes[matchNames[0]]
				sndNode = NeighborJoiningNode(matchNames[0])
			if matchNames[1] in nodes:
				fstNode = nodes[matchNames[1]]
				fstNode = NeighborJoiningNode(matchNames[1])

			d_ij = dMtx[minPair[0]][minPair[1]]
			s_i  = sMeasure[minPair[1]]
			s_j  = sMeasure[minPair[0]]
			fstEdge = Edge(commonAncestor, fstNode, 0.5 * d_ij + 0.5 * (s_i - s_j))
			sndEdge = Edge(commonAncestor, sndNode, 0.5 * d_ij + 0.5 * (s_j - s_i))
			nodes[ancestorName] = commonAncestor
			root                = commonAncestor
			dMtx                = recompute_d_matrix(dMtx, minPair)
			d              = dMtx[1][0]
			matchNames     = (names[0], names[1])
			ancestorName   = "[%s + %s]" % matchNames
			commonAncestor = NeighborJoiningNode(ancestorName)  
			if matchNames[0] in nodes:
				sndNode = nodes[matchNames[0]]
				sndNode = NeighborJoiningNode(matchNames[0])
			if matchNames[1] in nodes:
				fstNode = nodes[matchNames[1]]
				fstNode = NeighborJoiningNode(matchNames[1])
			Edge(commonAncestor, fstNode, d)
			Edge(commonAncestor, sndNode, d)
			root = commonAncestor

	return root
Пример #2
def upgma(distMtx, names):
    def recompute_matrix(dimMtx, fst, snd):
		well, could (should, must) be done better.
        w, h = dimMtx.shape
        newMtx = zeros((w - 1, h - 1))
        newMtx[newMtx == 0] = float("inf")

        newMtxRowIdx = 0

        for i in range(0, h):
            newMtxColIdx = 0

            if i == fst:
                # skip row of first matching item (this item is eliminated)
            elif i == snd and i < 2:
                # skip for of second matching item, and increse row counter of new matrix (this item represents combined item)
                # no need to compute distances (lower triangular form)
                newMtxRowIdx = newMtxRowIdx + 1

            combined = i == snd

            for j in range(0, i):
                if j == fst:
                    # skip column of first matching item (this item is eliminated)

                elif j == snd:
                    # compute average - dimMtx not symmetric, so watch for Infs!
                    if isfinite(dimMtx[i][fst]):
                        fstVal = dimMtx[i][fst]
                        fstVal = dimMtx[fst][i]

                    if isfinite(dimMtx[i][snd]):
                        sndVal = dimMtx[i][snd]
                        sndVal = dimMtx[snd][i]

                    newMtx[newMtxRowIdx][newMtxColIdx] = (fstVal + sndVal) / 2
                    newMtx[newMtxRowIdx][newMtxColIdx] = dimMtx[i][j]

                newMtxColIdx = newMtxColIdx + 1

            newMtxRowIdx = newMtxRowIdx + 1

        return newMtx

    nodes = {}
    root = None

    while any(isfinite(distMtx)):
        match = find_min(distMtx)
        dist = distMtx[match[0]][match[1]]
        distMtx = recompute_matrix(distMtx, match[1], match[0])

        matchNames = [names[match[1]], names[match[0]]]
        ancestorName = "[%s + %s]" % (matchNames[0], matchNames[1])
        names[match[0]] = ancestorName

        commonAncestor = UPGMANode(ancestorName)
        if matchNames[0] in nodes:
            fstNode = nodes[matchNames[0]]
            fstNode = UPGMANode(matchNames[0])

        if matchNames[1] in nodes:
            sndNode = nodes[matchNames[1]]
            sndNode = UPGMANode(matchNames[1])

        fstEdge = Edge(commonAncestor, fstNode, dist / 2)
        sndEdge = Edge(commonAncestor, sndNode, dist / 2)

        nodes[ancestorName] = commonAncestor
        root = commonAncestor

    return root