示例#1
0
def computeFromSeq(seqs, subseqLength=1):
    """Given a collection of sequences, returns an array of which seq an
	element i came from in the concatenation of all the seqs

	>>> s = [[1,2],[3,4,5]]
	>>> computeFromSeq(s)
	array([0, 0, 1, 1, 1])
	>>> computeFromSeq(s, subseqLength=2)
	array([0, 1, 1])
	>>> computeFromSeq([1,2])
	array([0, 0])
	"""
    # just one seq -> array of all 0s
    if isScalar(seqs[0]):
        return np.zeros(len(seqs), dtype=np.int)
    if len(seqs) == 1:
        return np.zeros(len(seqs[0]), dtype=np.int)

    seqLens = np.array(map(lambda seq: len(seq) - subseqLength + 1, seqs))
    cumLen = np.cumsum(seqLens)
    combinedLength = cumLen[-1]
    startIdxs = np.r_[0, cumLen[:-1]]
    endIdxs = np.r_[startIdxs[1:], combinedLength]
    fromSeq = np.zeros(combinedLength, dtype=np.int)
    for i in range(len(startIdxs)):
        startIdx, endIdx = startIdxs[i], endIdxs[i]
        fromSeq[startIdx:endIdx] = i
    return fromSeq
示例#2
0
文件: subseq.py 项目: dblalock/flock
def computeFromSeq(seqs, subseqLength=1):
	"""Given a collection of sequences, returns an array of which seq an
	element i came from in the concatenation of all the seqs

	>>> s = [[1,2],[3,4,5]]
	>>> computeFromSeq(s)
	array([0, 0, 1, 1, 1])
	>>> computeFromSeq(s, subseqLength=2)
	array([0, 1, 1])
	>>> computeFromSeq([1,2])
	array([0, 0])
	"""
	# just one seq -> array of all 0s
	if isScalar(seqs[0]):
		return np.zeros(len(seqs), dtype=np.int)
	if len(seqs) == 1:
		return np.zeros(len(seqs[0]), dtype=np.int)

	seqLens = np.array(map(lambda seq: len(seq) - subseqLength + 1, seqs))
	cumLen = np.cumsum(seqLens)
	combinedLength = cumLen[-1]
	startIdxs = np.r_[0, cumLen[:-1]]
	endIdxs = np.r_[startIdxs[1:], combinedLength]
	fromSeq = np.zeros(combinedLength, dtype=np.int)
	for i in range(len(startIdxs)):
		startIdx, endIdx = startIdxs[i], endIdxs[i]
		fromSeq[startIdx:endIdx] = i
	return fromSeq
示例#3
0
def optimalAlignK(scores, m, k):
    """
	Given an array of scores, return the indices I of the k best scores such
	that for all i, j in I, i !=j -> |i - m| >= m; in other words, the indices
	must be m apart

	Parameters
	----------
	scores: 1D, array-like
		an ordered collection of scores
	m: int
		minimum spacing between reported indices
	k: int or array-like of int
		number of indices to return

	Returns
	-------
	idxs: an array of indices for each k value specified (a single array
		or a list thereof, depending on whether k is an int or a collection)

	>>> s = [2,1,4,3]
	>>> optimalAlignK(s, 2, 1)
	array(2)
	>>> optimalAlignK(s, 2, 2)
	array([0, 2])
	>>> optimalAlignK(s, 3, 2)
	array([0, 3])
	>>> optimalAlignK(s, 4, 2)
	array(2)
	>>> optimalAlignK(s, 2, [1, 2])
	[array([2]), array([0, 2])]
	>>> s2 = [2,1,4,3,1,7,1]
	>>> optimalAlignK(s2, 2, [2, 3])
	[array([2, 5]), array([0, 2, 5])]
	>>> optimalAlignK(s2, 3, [1, 2, 3])
	[array([5]), array([2, 5]), array([0, 3, 6])]
	>>> s3 = [2,1,4,3,1,7,-99]
	>>> optimalAlignK(s3, 3, [3])
	[]
	"""
    # ------------------------ err handling and arg munging
    if scores is None or not len(scores):
        raise RuntimeError("No scores given!")
    if k is None:
        raise RuntimeError("Number of locations to return must be >= 1")
    if isScalar(k):
        k = (k, )
    k = np.sort(np.asarray(k))
    kmax = np.max(k)
    if kmax < 1:
        raise RuntimeError("Number of locations to return must be >= 1")

    n = len(scores)
    if n <= m or k[-1] == 1:
        return np.array(np.argmax(scores), dtype=np.int)

    scores = np.asarray(scores)
    if np.all(scores <= 0.):
        print("Warning: optimalAlignK(): all scores <= 0")
        return [[] for kk in k]

    # ------------------------ find best score and parent for each k at each idx

    # initialize first m points
    historyShape = (len(scores), kmax)
    c = np.zeros(historyShape) - 1  # cumulative score
    c[:m, 0] = scores[:m]
    parentIdxs = np.zeros(historyShape, dtype=np.int) - 1  # previous best idx

    # compute scores and parent idxs
    bestScores = np.zeros(kmax)
    bestIdxs = np.zeros(kmax) - 1
    for i in range(m, n):
        oldIdx = i - m
        betterForTheseK = c[oldIdx] > bestScores
        # print i, bestScores, bestIdxs
        if np.any(betterForTheseK
                  ):  # check not really needed; will just do nothing
            bestScores[betterForTheseK] = c[oldIdx, betterForTheseK]
            bestIdxs[betterForTheseK] = oldIdx
        parentIdxs[i, 1:] = bestIdxs[:-1]
        c[i, 1:] = bestScores[:-1] + scores[i]
        c[i, 1:] *= parentIdxs[i, 1:] >= 0  # only valid parents
        c[i, 0] = scores[i]  # TODO? seemingly no point if < bestScores[0]

    # print np.c_[np.arange(n), scores, c]
    # print np.c_[np.arange(n), scores, parentIdxs]

    # compute best set of idxs for each value of k
    allParents = []
    for kk in k:
        kIdx = kk - 1
        parents = []
        parent = np.argmax(c[:, kIdx])
        if c[parent, kIdx] <= 0.:
            allParents.append([])
            continue
        while parent >= 0:
            parents.append(parent)
            parent = parentIdxs[parent, kIdx]
            kIdx -= 1
        parents = np.array(parents, dtype=np.int)[::-1]

        allParents.append(parents)

    if len(k) == 1:
        return allParents[0]
    return allParents
示例#4
0
文件: subseq.py 项目: dblalock/flock
def optimalAlignK(scores, m, k):
	"""
	Given an array of scores, return the indices I of the k best scores such
	that for all i, j in I, i !=j -> |i - m| >= m; in other words, the indices
	must be m apart

	Parameters
	----------
	scores: 1D, array-like
		an ordered collection of scores
	m: int
		minimum spacing between reported indices
	k: int or array-like of int
		number of indices to return

	Returns
	-------
	idxs: an array of indices for each k value specified (a single array
		or a list thereof, depending on whether k is an int or a collection)

	>>> s = [2,1,4,3]
	>>> optimalAlignK(s, 2, 1)
	array(2)
	>>> optimalAlignK(s, 2, 2)
	array([0, 2])
	>>> optimalAlignK(s, 3, 2)
	array([0, 3])
	>>> optimalAlignK(s, 4, 2)
	array(2)
	>>> optimalAlignK(s, 2, [1, 2])
	[array([2]), array([0, 2])]
	>>> s2 = [2,1,4,3,1,7,1]
	>>> optimalAlignK(s2, 2, [2, 3])
	[array([2, 5]), array([0, 2, 5])]
	>>> optimalAlignK(s2, 3, [1, 2, 3])
	[array([5]), array([2, 5]), array([0, 3, 6])]
	>>> s3 = [2,1,4,3,1,7,-99]
	>>> optimalAlignK(s3, 3, [3])
	[]
	"""
	# ------------------------ err handling and arg munging
	if scores is None or not len(scores):
		raise RuntimeError("No scores given!")
	if k is None:
		raise RuntimeError("Number of locations to return must be >= 1")
	if isScalar(k):
		k = (k,)
	k = np.sort(np.asarray(k))
	kmax = np.max(k)
	if kmax < 1:
		raise RuntimeError("Number of locations to return must be >= 1")

	n = len(scores)
	if n <= m or k[-1] == 1:
		return np.array(np.argmax(scores), dtype=np.int)

	scores = np.asarray(scores)
	if np.all(scores <= 0.):
		print("Warning: optimalAlignK(): all scores <= 0")
		return [[] for kk in k]

	# ------------------------ find best score and parent for each k at each idx

	# initialize first m points
	historyShape = (len(scores), kmax)
	c = np.zeros(historyShape) - 1							# cumulative score
	c[:m, 0] = scores[:m]
	parentIdxs = np.zeros(historyShape, dtype=np.int) - 1 	# previous best idx

	# compute scores and parent idxs
	bestScores = np.zeros(kmax)
	bestIdxs = np.zeros(kmax) - 1
	for i in range(m, n):
		oldIdx = i - m
		betterForTheseK = c[oldIdx] > bestScores
		# print i, bestScores, bestIdxs
		if np.any(betterForTheseK): # check not really needed; will just do nothing
			bestScores[betterForTheseK] = c[oldIdx, betterForTheseK]
			bestIdxs[betterForTheseK] = oldIdx
		parentIdxs[i, 1:] = bestIdxs[:-1]
		c[i, 1:] = bestScores[:-1] + scores[i]
		c[i, 1:] *= parentIdxs[i, 1:] >= 0 # only valid parents
		c[i, 0] = scores[i] # TODO? seemingly no point if < bestScores[0]

	# print np.c_[np.arange(n), scores, c]
	# print np.c_[np.arange(n), scores, parentIdxs]

	# compute best set of idxs for each value of k
	allParents = []
	for kk in k:
		kIdx = kk - 1
		parents = []
		parent = np.argmax(c[:, kIdx])
		if c[parent, kIdx] <= 0.:
			allParents.append([])
			continue
		while parent >= 0:
			parents.append(parent)
			parent = parentIdxs[parent, kIdx]
			kIdx -= 1
		parents = np.array(parents, dtype=np.int)[::-1]

		allParents.append(parents)

	if len(k) == 1:
		return allParents[0]
	return allParents