Exemplos de maxSubarray em Python, exemplos de ff3.maxSubarray em Python

Exemplo n.º 1

0

Exibir arquivo

Arquivo: ff6.py Projeto: yiweichung/extract

def main():
    # np.random.seed(123)

    # ================================ consts for everything
    # consts for generating data
    # n = 1000
    n = 500
    # n = 300
    # length = 8
    # length = 16
    length = 32
    # length = 50
    # nInstances = 3
    exampleLengths = [55, 60, 65]
    # exampleLengths = [60, 60, 60]
    noiseStd = .5

    # consts for algorithm
    Lmin = max(20, length)  # only needed for optimalAlignK() spacing
    Lmax = 100  # loose upper bound on pattern length
    minSim = .5  # loose cutoff for what counts as similar

    # k0 = len(exampleLengths) # for version where we tell it k
    answerIdxs = None

    # ------------------------ synthetic data

    # seq = synth.randconst(n, std=noiseStd)
    # seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=4)
    seq = synth.notSoRandomWalk(n,
                                std=noiseStd,
                                trendFilterLength=80,
                                lpfLength=16)
    seq = embedExamples(seq, exampleLengths)

    # ------------------------ msrc

    from ..datasets import read_msrc as msrc
    idxs = [2]
    # idxs = [0]
    downsampleBy = 2
    recordings = msrc.getRecordings(idxs=idxs)
    r = list(recordings)[0]
    # seq = r.data
    # seq = r.data[:, :40]
    # seq = r.data[:, 20:23]
    seq = r.data[:, 24:27]
    # seq = r.data[:, 20:27]
    print "orig seq shape", seq.shape
    seq = ar.downsampleMat(seq, rowsBy=downsampleBy)
    print "downsampled seq shape", seq.shape
    length = max(8, Lmin / 2)
    Lmin = len(seq) / 20
    Lmax = len(seq) / 10
    # Lmax = len(seq) / 20
    # k0 = 10
    minSim = .5
    answerIdxs = r.gestureIdxs / downsampleBy
    # print "seq shape", seq.shape
    prePadLen = Lmax - length
    postPadLen = length - 1
    first = np.tile(seq[0], (prePadLen, 1))
    last = np.tile(seq[-1], (postPadLen, 1))
    seq = np.vstack(
        (first, seq, last))  # pad with fixed val to allow all window positions
    # ^ TODO pad simMat with zeros instead--this introduces fake subseqs
    answerIdxs += prePadLen
    # seq = np.vstack((seq, np.tile(flat, (length-1, 1)))) # lets it get the last rep
    # print "seq shape", seq.shape

    # r.plot()

    # plt.figure()
    # plt.plot(r.sampleTimes)

    # answerIdxs = r.gestureIdxs / downsampleBy
    # print r.gestureIdxs
    # print answerIdxs

    # plt.figure()
    # plt.plot(seq)
    # for idx in answerIdxs:
    # 	ax = plt.gca()
    # 	viz.plotVertLine(idx, ax=ax)
    # plt.show()

    # return

    # noise = synth.randconst(seq.shape) # add noise for debugging
    # seq = np.r_[noise, seq, noise]

    # ================================ simMat

    X = computeSimMat(seq, length)
    X[X < minSim] = 0.
    # Xorig = np.copy(X)
    X = ff2.localMaxFilterSimMat(X)
    Xblur = ff2.filterSimMat(X,
                             length - 1,
                             'hamming',
                             scaleFilterMethod='max1')
    # Xblur = ff2.filterSimMat(X, Lmin-1, 'hamming', scaleFilterMethod='max1')
    Xblur = np.minimum(Xblur, 1.)

    print "simMat dims:", X.shape
    Xnonzeros = np.count_nonzero(X)
    print "simMat nonzeros, total, frac = ", Xnonzeros, X.size, Xnonzeros / float(
        X.size)

    # ================================ plotting crap

    plt.figure()
    axSeq = plt.subplot2grid((4, 1), (0, 0))
    axSim = plt.subplot2grid((4, 1), (1, 0), rowspan=3)
    for ax in (axSeq, axSim):
        ax.autoscale(tight=True)
    axSeq.plot(seq)
    if answerIdxs is not None:
        for idx in answerIdxs:
            viz.plotVertLine(idx, ax=axSeq)
    Xpad = synth.appendZeros(X, length - 1)
    axSim.imshow(Xpad, interpolation='nearest', aspect='auto')
    # im = axSim.imshow(Xpad, interpolation='nearest', aspect='auto')
    # plt.colorbar(im, cax=axSim)

    axSeq.set_title("Time Series")
    axSim.set_title("Similarities Matrix")

    # plt.figure()
    # plt.imshow(Xorig, interpolation='nearest', aspect='auto')
    # plt.colorbar()

    # plt.figure()
    # plt.imshow(X, interpolation='nearest', aspect='auto')
    # plt.colorbar()

    # # plt.figure()
    # # Xfilt = ff2.localMaxFilterSimMat(X, allowEq=True)
    # # plt.imshow(Xfilt, interpolation='nearest', aspect='auto')
    # # plt.colorbar()

    # # plt.figure()
    # # Xfilt = ff2.localMaxFilterSimMat(X, allowEq=False)
    # # plt.imshow(Xfilt, interpolation='nearest', aspect='auto')
    # # plt.colorbar()

    # plt.figure()
    # plt.imshow(Xblur, interpolation='nearest', aspect='auto')
    # plt.colorbar()

    # plt.show()
    # return

    # ================================ science

    # ------------------------ derived stats
    kMax = int(X.shape[1] / Lmin + .5)
    windowLen = Lmax - length + 1
    # windowShape = (X.shape[0], Lmax)
    # windowSize = np.prod(windowShape)
    nLocs = X.shape[1] - windowLen + 1

    p0 = np.mean(X)  # fraction of entries that are 1 (roughly)

    # intersections = computeIntersections(X, windowLen)
    # windowSims = np.sum(intersections, axis=2)
    # colSims = np.dot(X.T, X)
    colSims = np.dot(X.T, Xblur)
    filt = np.zeros((windowLen, windowLen)) + np.diag(
        np.ones(windowLen))  # zeros except 1s on diag
    windowSims = sig.convolve2d(colSims, filt, mode='valid')

    windowVects = vectorizeWindowLocs(X, windowLen)
    windowVectsBlur = vectorizeWindowLocs(Xblur, windowLen)

    plt.figure()
    plt.imshow(windowSims, interpolation='nearest', aspect='auto')

    # plt.show()
    # return

    # ------------------------ find stuff

    # #
    # # Version where we we tell it k
    # #
    # bsfScore = 0
    # bsfLocs = None
    # bsfIntersection = None
    # # selfSims = np.diagonal(windowSims)
    # # candidateRowIdxs = np.where(selfSims * k0 <= bsfScore)[0]
    # # for i in candidateRowIdxs:
    # # 	row = windowSims[i]
    # for i, row in enumerate(windowSims):
    # 	if i % 20 == 0:
    # 		print("computing stuff for row {}".format(i))
    # 	if windowSims[i, i] * k0 <= bsfScore:
    # 		continue
    # 	idxs = sub.optimalAlignK(row, Lmin, k0)
    # 	intersection = windowVects[i]
    # 	sz = 0
    # 	for idx in idxs:
    # 		intersection = np.minimum(intersection, windowVectsBlur[idx])
    # 		sz = np.sum(intersection)
    # 		if sz * k0 <= bsfScore:
    # 			break
    # 	score = sz * k0
    # 	if score > bsfScore:
    # 		print("window {0}, k={1}, score={2} is the new best!".format(i, k0, score))
    # 		bsfScore = score
    # 		bsfLocs = idxs
    # 		bsfIntersection = np.copy(intersection)

    # #
    # # Version where we look for similarities to orig seq
    # #
    # bsfScore = 0
    # bsfLocs = None
    # bsfIntersection = None
    # for i, row in enumerate(windowSims):
    # 	if i % 20 == 0:
    # 		print("computing stuff for row {}".format(i))
    # 	# early abandon if this location has so little stuff that no
    # 	# intersection with it can possibly beat the best score
    # 	if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
    # 		# print("immediately abandoning window {}!".format(i))
    # 		continue
    # 	# print("not abandoning window {}!".format(i))
    # 	# best combination of idxs such that none are within Lmin of each other
    # 	idxs = sub.optimalAlignment(row, Lmin)
    # 	# print i, ": ", idxs
    # 	# order idxs by descending order of associated score
    # 	sizes = windowSims[i, idxs]
    # 	sortedSizesOrder = np.argsort(sizes)[::-1]
    # 	sortedIdxs = idxs[sortedSizesOrder]
    # 	# iteratively intersect with another near neighbor, compute the
    # 	# associated score, and check if it's better (or if we can early abandon)
    # 	intersection = windowVects[i]
    # 	numIdxs = len(sortedIdxs)
    # 	for j, idx in enumerate(sortedIdxs):
    # 		k = j + 1
    # 		intersection = np.minimum(intersection, windowVectsBlur[idx])
    # 		sz = np.sum(intersection) # use apodization window
    # 		# sz = np.count_nonzero(intersection) # just max-pool
    # 		score = sz * k
    # 		if k > 1 and score > bsfScore:
    # 			print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
    # 			print("sortedIdxs = {}".format(str(sortedIdxs)))
    # 			print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
    # 			print("------------------------")
    # 			bsfScore = score
    # 			bsfLocs = sortedIdxs[:k]
    # 			bsfIntersection = np.copy(intersection)
    # 		# early abandon if this can't possibly beat the best score, which
    # 		# is the case exactly when the intersection is so small that perfect
    # 		# matches at all future locations still wouldn't be good enough
    # 		elif sz * numIdxs <= bsfScore:
    # 			# print("early abandoning window {} at k={}".format(i, k))
    # 			break

    # #
    # # Version where we look for similarities to orig seq and use nearest
    # # enemy dist as M0
    # #
    # bsfScore = 0
    # bsfLocs = None
    # bsfIntersection = None
    # for i, row in enumerate(windowSims):
    # 	if i % 20 == 0:
    # 		print("computing stuff for row {}".format(i))
    # 	# early abandon if this location has so little stuff that no
    # 	# intersection with it can possibly beat the best score
    # 	if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
    # 		# print("immediately abandoning window {}!".format(i))
    # 		continue

    # 	# best combination of idxs such that none are within Lmin of each other
    # 	# validRow = row[:(-length + 1)] # can't go past end of ts
    # 	# idxs = sub.optimalAlignment(validRow, Lmin)
    # 	idxs = sub.optimalAlignment(row, Lmin) # goes past end of ts, but better

    # 	# order idxs by descending order of associated score
    # 	sizes = windowSims[i, idxs]
    # 	sortedSizesOrder = np.argsort(sizes)[::-1]
    # 	sortedIdxs = idxs[sortedSizesOrder]

    # 	# iteratively intersect with another near neighbor, compute the
    # 	# associated score, and check if it's better (or if we can early abandon)
    # 	intersection = windowVects[i]
    # 	numIdxs = len(sortedIdxs)
    # 	# allZeros = np.zeros(intersection.shape)
    # 	nextIntersection = np.minimum(intersection, windowVectsBlur[sortedIdxs[0]])
    # 	nextSz = np.sum(nextIntersection)
    # 	for j, idx in enumerate(sortedIdxs):
    # 		k = j + 1
    # 		intersection = np.copy(nextIntersection)
    # 		sz = nextSz
    # 		if k < numIdxs:
    # 			nextIdx = sortedIdxs[k] # since k = j+1
    # 			nextIntersection = np.minimum(intersection, windowVectsBlur[nextIdx])
    # 			nextSz = np.sum(nextIntersection) # sum -> use apodization window
    # 		else:
    # 			nextSz = sz * p0

    # 		score = (sz - nextSz) * k
    # 		if k > 1 and score > bsfScore:
    # 			print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
    # 			print("sortedIdxs = {}".format(str(sortedIdxs)))
    # 			print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
    # 			print("------------------------")
    # 			bsfScore = score
    # 			bsfLocs = sortedIdxs[:k]
    # 			bsfIntersection = np.copy(intersection)
    # 		# early abandon if this can't possibly beat the best score, which
    # 		# is the case exactly when the intersection is so small that perfect
    # 		# matches at all future locations still wouldn't be good enough
    # 		elif sz * numIdxs <= bsfScore:
    # 			# print("early abandoning window {} at k={}".format(i, k))
    # 			break

    #
    # Version where we look for similarities to orig seq and use nearest
    # enemy dist as M0, and use mean values instead of intersection
    #
    bsfScore = 0
    bsfLocs = None
    bsfIntersection = None
    for i, row in enumerate(windowSims):
        if i % 20 == 0:
            print("computing stuff for row {}".format(i))
        # early abandon if this location has so little stuff that no
        # intersection with it can possibly beat the best score
        if windowSims[
                i,
                i] * kMax <= bsfScore:  # highest score is kMax identical locs
            continue

        # best combination of idxs such that none are within Lmin of each other
        # validRow = row[:(-length + 1)] # can't go past end of ts
        # idxs = sub.optimalAlignment(validRow, Lmin)
        idxs = sub.optimalAlignment(row,
                                    Lmin)  # goes past end of ts, but better

        # order idxs by descending order of associated score
        sizes = windowSims[i, idxs]
        sortedSizesOrder = np.argsort(sizes)[::-1]
        sortedIdxs = idxs[sortedSizesOrder]

        # iteratively intersect with another near neighbor, compute the
        # associated score, and check if it's better (or if we can early abandon)
        intersection = windowVects[i]
        numIdxs = len(sortedIdxs)
        nextSz = np.sum(intersection)
        nextFilt = np.array(intersection, dtype=np.float)
        nextFiltSum = np.array(nextFilt, dtype=np.float)
        for j, idx in enumerate(sortedIdxs):
            k = j + 1
            filt = np.copy(nextFilt)
            sz = nextSz
            if k < numIdxs:
                nextIdx = sortedIdxs[k]  # since k = j+1
                nextIntersection = np.minimum(filt, windowVectsBlur[nextIdx])
                nextFiltSum += nextIntersection
                nextFilt = nextFiltSum / (
                    k + 1)  # avg value of each feature in intersections
                # nextSz = np.sum(nextFilt) # big even if like no intersection...
                nextSz = np.sum(nextIntersection)
                bigEnoughIntersection = nextIntersection[
                    nextIntersection > minSim]
                nextSz = np.sum(bigEnoughIntersection)
            else:
                nextSz = sz * p0

            score = (sz - nextSz) * k
            if k > 1 and score > bsfScore:
                print("window {0}, k={1}, score={2} is the new best!".format(
                    i, k, score))
                print("sortedIdxs = {}".format(str(sortedIdxs)))
                print("sortedIdxScores = {}".format(
                    str(windowSims[i, sortedIdxs])))
                print("------------------------")
                bsfScore = score
                bsfLocs = sortedIdxs[:k]
                bsfIntersection = np.copy(filt)
            # early abandon if this can't possibly beat the best score, which
            # is the case exactly when the intersection is so small that perfect
            # matches at all future locations still wouldn't be good enough
            elif sz * numIdxs <= bsfScore:
                # TODO can we actually early abandon here? next window loc
                # could increase filt, and thus score for a given loc isn't
                # necessarily non-increasing...
                # 	-can't abandon using this test, but pretty sure there's
                # 	a lower bound to be had here somewhere
                # print("early abandoning window {} at k={}".format(i, k))
                break

    #
    # Version where we look for similarities to orig seq and use nearest
    # enemy dist as M0, and use mean values instead of intersection,
    # and don't sort the indices, but instead care about overlap
    #
    # bsfScore = 0
    # bsfLocs = None
    # bsfIntersection = None
    # for i, row in enumerate(windowSims):
    # 	if i % 20 == 0:
    # 		print("computing stuff for row {}".format(i))
    # 	# early abandon if this location has so little stuff that no
    # 	# intersection with it can possibly beat the best score
    # 	if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
    # 		continue

    # 	# best combination of idxs such that none are within Lmin of each other
    # 	# validRow = row[:(-length + 1)] # can't go past end of ts
    # 	# idxs = sub.optimalAlignment(validRow, Lmin)
    # 	idxs = sub.optimalAlignment(row, Lmin) # goes past end of ts, but better

    # 	# order idxs by descending order of associated score
    # 	sizes = windowSims[i, idxs]
    # 	sortedSizesOrder = np.argsort(sizes)[::-1]
    # 	sortedIdxs = idxs[sortedSizesOrder]

    # 	# iteratively intersect with another near neighbor, compute the
    # 	# associated score, and check if it's better (or if we can early abandon)
    # 	intersection = windowVects[i]
    # 	numIdxs = len(sortedIdxs)
    # 	nextSz = np.sum(intersection)
    # 	nextFilt = np.array(intersection, dtype=np.float)
    # 	nextFiltSum = np.array(nextFilt, dtype=np.float)
    # 	for j, idx in enumerate(sortedIdxs):
    # 		k = j + 1
    # 		filt = np.copy(nextFilt)
    # 		sz = nextSz
    # 		if k < numIdxs:
    # 			nextIdx = sortedIdxs[k] # since k = j+1
    # 			nextIntersection = np.minimum(filt, windowVectsBlur[nextIdx])
    # 			nextFiltSum += nextIntersection
    # 			nextFilt = nextFiltSum / (k+1)  # avg value of each feature in intersections
    # 			# nextSz = np.sum(nextFilt) # big even if like no intersection...
    # 			nextSz = np.sum(nextIntersection)
    # 			bigEnoughIntersection = nextIntersection[nextIntersection > minSim]
    # 			nextSz = np.sum(bigEnoughIntersection)
    # 		else:
    # 			nextSz = sz * p0

    # 		score = (sz - nextSz) * k
    # 		if k > 1 and score > bsfScore:
    # 			print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
    # 			print("sortedIdxs = {}".format(str(sortedIdxs)))
    # 			print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
    # 			print("------------------------")
    # 			bsfScore = score
    # 			bsfLocs = sortedIdxs[:k]
    # 			bsfIntersection = np.copy(filt)
    # 		# early abandon if this can't possibly beat the best score, which
    # 		# is the case exactly when the intersection is so small that perfect
    # 		# matches at all future locations still wouldn't be good enough
    # 		elif sz * numIdxs <= bsfScore:
    # 			# TODO can we actually early abandon here? next window loc
    # 			# could increase filt, and thus score for a given loc isn't
    # 			# necessarily non-increasing...
    # 			# 	-can't abandon using this test, but pretty sure there's
    # 			# 	a lower bound to be had here somewhere
    # 			# print("early abandoning window {} at k={}".format(i, k))
    # 			break

    # ------------------------ recover original ts

    bsfIntersection *= bsfIntersection >= minSim
    bsfIntersectionWindow = bsfIntersection.reshape((-1, windowLen))
    sums = np.sum(bsfIntersectionWindow, axis=0)

    kBest = len(bsfLocs)
    p0 = np.power(p0, kBest)
    # expectedOnesPerCol = p0 * X.shape[1]
    expectedOnesPerCol = p0 * X.shape[1] * 2
    sums -= expectedOnesPerCol

    plt.plot(sums)

    start, end, _ = maxSubarray(sums)
    patStart, patEnd = start, end + 1 + length

    # ================================ show output

    print "bestScore = {}".format(bsfScore)
    print "bestLocations = {}".format(str(bsfLocs))

    for idx in bsfLocs:
        viz.plotRect(axSim, idx, idx + windowLen)

    # print bsfIntersectionWindow.shape
    # print sums.shape

    # plt.plot(sums)
    # viz.plotRect(plt.gca(), start, end + 1)
    for idx in bsfLocs:
        viz.plotRect(axSeq, idx + patStart, idx + patEnd)

    plt.figure()
    plt.imshow(bsfIntersectionWindow, interpolation='nearest', aspect='auto')

    plt.tight_layout()
    plt.show()

Exemplo n.º 2

0

Exibir arquivo

Arquivo: ff8.py Projeto: yiweichung/extract

def learnFF(X, Xblur, Lmin, Lmax, length):
	"""main algorithm"""

	# ------------------------ derived stats
	kMax = int(X.shape[1] / Lmin + .5)
	# windowLen = Lmax - length + 1
	windowLen = Lmax # try matching ff10

	print "using window len ", windowLen

	p0 = np.mean(X) # fraction of entries that are 1 (roughly)
	# p0 = np.mean(X > 0.) # fraction of entries that are 1 # TODO try this
	# p0 = 2 * np.mean(X > 0.) # lambda for l0 reg based on features being bernoulli at 2 locs
	minSim = p0
	expectedOnesPerWindow = p0 * X.shape[0] * windowLen
	noiseSz = p0 * expectedOnesPerWindow # num ones to begin with
	# noiseSz *= -np.log2(p0) # TODO this is right mathematically, but what will it do?
	# noiseSz = p0 * X.shape[0] * windowLen # way too hard to beat

	colSims = np.dot(X.T, Xblur)
	filt = np.zeros((windowLen, windowLen)) + np.diag(np.ones(windowLen)) # zeros except 1s on diag
	windowSims = sig.convolve2d(colSims, filt, mode='valid')

	windowVects = vectorizeWindowLocs(X, windowLen)
	windowVectsBlur = vectorizeWindowLocs(Xblur, windowLen)

	print "p0, noiseSz = ", p0, noiseSz

	# plt.figure()
	# plt.imshow(windowSims, interpolation='nearest', aspect='auto')

	# ------------------------ find stuff

	#
	# Version where we look for similarities to orig seq and use nearest
	# enemy dist as M0, and use mean values instead of intersection
	#
	bsfScore = 0
	bsfLocs = None
	bsfIntersection = None
	for i, row in enumerate(windowSims):
		if i % 10 == 0:
			print("computing stuff for row {}".format(i))
		# early abandon if this location has so little stuff that no
		# intersection with it can possibly beat the best score
		if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
			continue

		# best combination of idxs such that none are within Lmin of each other
		idxs = sub.optimalAlignment(row, Lmin)

		# order idxs by descending order of associated score
		sizes = windowSims[i, idxs]
		sortedSizesOrder = np.argsort(sizes)[::-1]
		sortedIdxs = idxs[sortedSizesOrder]

		# iteratively intersect with another near neighbor, compute the
		# associated score, and check if it's better (or if we can early abandon)
		intersection = windowVects[i]
		numIdxs = len(sortedIdxs)
		nextSz = np.sum(intersection)
		nextFilt = np.array(intersection, dtype=np.float)
		nextFiltSum = np.array(nextFilt, dtype=np.float)
		for j, idx in enumerate(sortedIdxs):
			k = j + 1
			filt = np.copy(nextFilt)
			sz = nextSz
			if k < numIdxs:
				nextIdx = sortedIdxs[k] # since k = j+1
				# so we're zeroing out all the places where the filt is 0, but
				# where it isn't zero, we're not just adding the non-zeroed places
				# to the sum, but instead adding either them or the filter value
				# there, whichever is smaller; this is sort of a weird thing to
				# do. Maybe it gets us submodularity?
				# -actually, yes, this ensures that the weight of a given
				# feature is nonincreasing as locations are added
				# 	-which enables admissible early abandoning
				nextIntersection = np.minimum(filt, windowVectsBlur[nextIdx])
				nextFiltSum += nextIntersection
				nextFilt = nextFiltSum / (k+1)  # avg value of each feature in intersections
				# nextSz = np.sum(nextFilt) # big even if like no intersection...
				# nextSz = np.sum(nextIntersection)
				bigEnoughIntersection = nextIntersection[nextIntersection > minSim]
				nextSz = np.sum(bigEnoughIntersection)
			else:
				nextSz = sz * p0
			enemySz = max(nextSz, noiseSz)

			score = (sz - enemySz) * k
			if k > 1 and score > bsfScore:
				print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
				# print("sortedIdxs = {}".format(str(sortedIdxs)))
				# print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
				print("------------------------")
				bsfScore = score
				bsfLocs = sortedIdxs[:k]
				bsfIntersection = np.copy(filt)
			# early abandon if this can't possibly beat the best score, which
			# is the case exactly when the intersection is so small that perfect
			# matches at all future locations still wouldn't be good enough
			elif sz * numIdxs <= bsfScore:
				break # TODO can we actually early abandon here?
			elif noiseSz > nextSz:
				break

	# ------------------------ recover original ts

	print "bestScore = {}".format(bsfScore)
	print "bestLocations = {}".format(str(bsfLocs))

	bsfIntersection *= bsfIntersection >= minSim
	bsfIntersectionWindow = bsfIntersection.reshape((-1, windowLen))
	sums = np.sum(bsfIntersectionWindow, axis=0)

	kBest = len(bsfLocs)
	expectedOnesFrac = np.power(p0, kBest)
	expectedOnesPerCol = expectedOnesFrac * X.shape[0]
	sums -= expectedOnesPerCol

	# plt.figure()
	# plt.plot(sums)

	start, end, _ = maxSubarray(sums)

	print "learnFF: startIdxs, endIdxs:"
	print np.array(bsfLocs) + start
	print np.array(bsfLocs) + end
	print "learnFF: filtLen, windowLen = {}, {}".format(end - start, windowLen)

	return bsfLocs, bsfIntersectionWindow, start, end

Exemplo n.º 3

0

Exibir arquivo

Arquivo: ff7.py Projeto: dblalock/flock

def main():
	# np.random.seed(123)

	# ================================ consts for everything
	# consts for generating data
	# n = 1000
	n = 500
	# n = 300
	# length = 8
	# length = 16
	# length = 32
	# length = 50
	# nInstances = 3
	exampleLengths = [55, 60, 65]
	# exampleLengths = [60, 60, 60]
	noiseStd = .5

	# consts for algorithm
	# Lmin = max(20, length)	# only needed for optimalAlignK() spacing
	Lmin = 20				# only needed for optimalAlignK() spacing
	Lmax = 100				# loose upper bound on pattern length
	# minSim = .5
	minSim = 0.
	length = Lmin // 2
	# length = Lmin // 4
	# length = 3

	answerIdxs = None

	USE_MSRC = True
	# USE_MSRC = False

	# ================================ data

	# ------------------------ synthetic data

	# seq = synth.randconst(n, std=noiseStd)
	seq = synth.randwalk(n, std=noiseStd)
	# seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=4)
	# seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=16)
	seq = embedExamples(seq, exampleLengths)
	# seq = synth.appendZeros(seq, Lmax)

	# ------------------------ msrc

	if USE_MSRC:
		from ..datasets import read_msrc as msrc
		# idxs = [0]
		# idxs = [1]
		# idxs = [2]
		# idxs = [7] # length 1500, but instances of length like 20
		# idxs = [8] # gets owned on this one cuz patterns of length like 100
		# idxs = [9] # missing an annotation, it appears
		idxs = [10] # something crazy about feature rep here # TODO fix
		# idxs = [11] # crap cuz bad, low-variance signals
		# idxs = [12] # has garbagey sections like [10]
		# idxs = [13] # empty feature mat # TODO
		# idxs = [14]
		downsampleBy = 2
		# downsampleBy = 1
		recordings = msrc.getRecordings(idxs=idxs)
		r = list(recordings)[0]
		# seq = r.data
		# seq = r.data[:, :40]
		# seq = r.data[:, 20:23]
		seq = r.data[:, 24:27]
		# seq = r.data[:, 20:27]
		print "orig seq shape", seq.shape
		seq = ar.downsampleMat(seq, rowsBy=downsampleBy)
		print "downsampled seq shape", seq.shape
		# length = max(8, Lmin / 2)
		Lmin = len(seq) // 20
		# Lmax = len(seq) // 8
		Lmax = len(seq) // 10
		length = Lmin // 2
		# Lmax = len(seq) / 20
		# k0 = 10
		# minSim = .5
		answerIdxs = r.gestureIdxs / downsampleBy
		# print "seq shape", seq.shape
		prePadLen = Lmax - length
		# postPadLen = length - 1
		postPadLen = Lmax - length
		first = np.tile(seq[0], (prePadLen, 1))
		last = np.tile(seq[-1], (postPadLen, 1))
		seq = np.vstack((first, seq, last)) # pad with fixed val to allow all window positions
		# ^ TODO pad simMat with zeros instead--this introduces fake subseqs
		answerIdxs += prePadLen
		# seq = np.vstack((seq, np.tile(flat, (length-1, 1)))) # lets it get the last rep
		# print "seq shape", seq.shape

	# ================================ feature construction

	logMaxLength = int(np.floor(np.log2(Lmax)))
	# logMaxLength = int(np.ceil(np.log2(Lmax)))
	# logMinLength = 3 # -> length 8
	# logMinLength = 4 # -> length 16
	logMinLength = int(np.floor(np.log2(Lmin)))
	lengths = np.arange(logMinLength, logMaxLength + 1)
	lengths = 2 ** lengths
	# lengths = [16]

	cardinality = 8
	breakpoints = rep.saxBreakpoints(cardinality)

	X = rep.multiNormalizeAndSparseQuantize(seq, lengths, breakpoints)
	# X = rep.multiSparseLineProject(seq, lengths, breakpoints, removeZeroRows=False)

	# lengths2 = np.arange(3, logMaxLength + 1)
	# lengths2 = 2 ** lengths2
	lengths2 = lengths # TODO uncomment after debug
	# lengths2 = [8, 32]

	# breakpoints2 = rep.defaultSparseLineBreakpoints(seq, scaleHowMany=2)
	breakpoints2 = rep.defaultSparseLineBreakpoints(seq)
	X2 = rep.multiSparseLineProject(seq, lengths2, breakpoints2)
	# X2 = X2 > minSim
	X2 = X2 > 0. # ignore correlations

	# print "shapes:"
	# print X.shape
	# print X2.shape

	X = np.vstack((X, X2))

	# plt.figure()
	# # viz.imshowBetter(X)
	# viz.imshowBetter(X2)
	# plt.figure()
	# viz.imshowBetter(X2 > 0.)
	# plt.show()

	# print seq.shape
	# plt.figure()

	# plt.plot(seq[:,0]) # bit of pattern, but only varies between -.4 and .2

	# okay, so 1st dim is all zeros
	# variances = rep.slidingVariance(seq, 8)
	# for dim in range(len(variances)):
	# 	plt.figure()
	# 	plt.plot(variances[dim].flatten())

	# print variances.shape
	# variances = rep.vstack3Tensor(variances.T)
	# print variances.shape
	# plt.plot(variances)

	# plt.show()
	# return

	X = localMaxFilterSimMat(X)
	# Xbool = np.copy(X)
	featureMeans = np.mean(X, axis=1).reshape((-1, 1))
	# print featureMeans
	X *= -np.log2(featureMeans) # variable encoding costs for rows
	# X /= -np.log(featureMeans)
	# Xblur = localMaxFilterSimMat(X) # try only maxFiltering Xblur
	Xblur = filterSimMat(X, length-1, 'hamming', scaleFilterMethod='max1')

	# plt.figure()
	# viz.imshowBetter(X)
	# plt.figure()
	# viz.imshowBetter(Xblur)

	print "featureMat dims:", X.shape
	Xnonzeros = np.count_nonzero(X)
	print "featureMat nonzeros, total, frac = ", Xnonzeros, X.size, Xnonzeros / float(X.size)

	# plt.show()
	# return

	# ================================ plotting crap

	plt.figure()
	axSeq = plt.subplot2grid((4,1), (0,0))
	axSim = plt.subplot2grid((4,1), (1,0), rowspan=3)
	for ax in (axSeq, axSim):
		ax.autoscale(tight=True)
	axSeq.plot(seq)
	# if answerIdxs is not None:
	# 	for idx in answerIdxs:
	# 		viz.plotVertLine(idx, ax=axSeq)
	padLen = len(seq) - X.shape[1]
	Xpad = synth.appendZeros(X, padLen)
	axSim.imshow(Xpad, interpolation='nearest', aspect='auto')
	# im = axSim.imshow(Xpad, interpolation='nearest', aspect='auto')
	# plt.colorbar(im, cax=axSim)

	axSeq.set_title("Time Series")
	axSim.set_title("Feature Matrix")

	# plt.show()
	# return

	# ================================ science

	# ------------------------ derived stats
	kMax = int(X.shape[1] / Lmin + .5)
	windowLen = Lmax - length + 1

	p0 = np.mean(X) # fraction of entries that are 1 (roughly)
	# p0 = 2 * np.mean(X) # lambda for l0 reg based on features being bernoulli at 2 locs
	minSim = p0
	# p0 = -np.log(np.mean(Xbool)) # fraction of entries that are 1 (roughly)
	# noiseSz = p0 * X.shape[0] * windowLen # way too hard to beat
	expectedOnesPerWindow = p0 * X.shape[0] * windowLen
	noiseSz = p0 * expectedOnesPerWindow # num ones to begin with

	# intersections = computeIntersections(X, windowLen)
	# windowSims = np.sum(intersections, axis=2)
	# colSims = np.dot(X.T, X)
	colSims = np.dot(X.T, Xblur)
	filt = np.zeros((windowLen, windowLen)) + np.diag(np.ones(windowLen)) # zeros except 1s on diag
	windowSims = sig.convolve2d(colSims, filt, mode='valid')

	windowVects = vectorizeWindowLocs(X, windowLen)
	windowVectsBlur = vectorizeWindowLocs(Xblur, windowLen)

	# plt.figure()
	# plt.imshow(windowSims, interpolation='nearest', aspect='auto')

	# ------------------------ find stuff

	#
	# Version where we look for similarities to orig seq and use nearest
	# enemy dist as M0, and use mean values instead of intersection
	#
	bsfScore = 0
	bsfLocs = None
	bsfIntersection = None
	for i, row in enumerate(windowSims):
		if i % 20 == 0:
			print("computing stuff for row {}".format(i))
		# early abandon if this location has so little stuff that no
		# intersection with it can possibly beat the best score
		if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
			continue

		# best combination of idxs such that none are within Lmin of each other
		# validRow = row[:(-length + 1)] # can't go past end of ts
		# idxs = sub.optimalAlignment(validRow, Lmin)
		idxs = sub.optimalAlignment(row, Lmin) # goes past end of ts, but better

		# order idxs by descending order of associated score
		sizes = windowSims[i, idxs]
		sortedSizesOrder = np.argsort(sizes)[::-1]
		sortedIdxs = idxs[sortedSizesOrder]

		# iteratively intersect with another near neighbor, compute the
		# associated score, and check if it's better (or if we can early abandon)
		intersection = windowVects[i]
		numIdxs = len(sortedIdxs)
		nextSz = np.sum(intersection)
		nextFilt = np.array(intersection, dtype=np.float)
		nextFiltSum = np.array(nextFilt, dtype=np.float)
		for j, idx in enumerate(sortedIdxs):
			k = j + 1
			filt = np.copy(nextFilt)
			sz = nextSz
			if k < numIdxs:
				nextIdx = sortedIdxs[k] # since k = j+1
				nextIntersection = np.minimum(filt, windowVectsBlur[nextIdx])
				nextFiltSum += nextIntersection
				nextFilt = nextFiltSum / (k+1)  # avg value of each feature in intersections
				# nextSz = np.sum(nextFilt) # big even if like no intersection...
				nextSz = np.sum(nextIntersection)
				bigEnoughIntersection = nextIntersection[nextIntersection > minSim]
				nextSz = np.sum(bigEnoughIntersection)
			else:
				nextSz = sz * p0
				# nextSz = -1
			enemySz = max(nextSz, noiseSz)

			score = (sz - enemySz) * k
			if k > 1 and score > bsfScore:
				print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
				print("sortedIdxs = {}".format(str(sortedIdxs)))
				print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
				print("------------------------")
				bsfScore = score
				bsfLocs = sortedIdxs[:k]
				bsfIntersection = np.copy(filt)
			# early abandon if this can't possibly beat the best score, which
			# is the case exactly when the intersection is so small that perfect
			# matches at all future locations still wouldn't be good enough
			elif sz * numIdxs <= bsfScore:
				# TODO can we actually early abandon here? next window loc
				# could increase filt, and thus score for a given loc isn't
				# necessarily non-increasing...
				# 	-can't abandon using this test, but pretty sure there's
				# 	a lower bound to be had here somewhere
				# print("early abandoning window {} at k={}".format(i, k))
				break
			elif noiseSz > nextSz:
				break

	# #
	# # Version where we look for similarities to orig seq and use nearest
	# # enemy dist as M0, and use mean values instead of intersection,
	# # and don't sort the indices, but instead care about overlap
	# #
	# bsfScore = 0
	# bsfLocs = None
	# bsfIntersection = None
	# for i, row in enumerate(windowSims):
	# 	if i % 20 == 0:
	# 		print("computing stuff for row {}".format(i))
	# 	# early abandon if this location has so little stuff that no
	# 	# intersection with it can possibly beat the best score
	# 	if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
	# 		continue

	# 	# best combination of idxs such that none are within Lmin of each other
	# 	# validRow = row[:(-length + 1)] # can't go past end of ts
	# 	# idxs = sub.optimalAlignment(validRow, Lmin)
	# 	idxs = sub.optimalAlignment(row, Lmin) # goes past end of ts, but better

	# 	# order idxs by descending order of associated score
	# 	sizes = windowSims[i, idxs]
	# 	sortedSizesOrder = np.argsort(sizes)[::-1]
	# 	sortedIdxs = idxs[sortedSizesOrder]

	# 	# iteratively intersect with another near neighbor, compute the
	# 	# associated score, and check if it's better (or if we can early abandon)
	# 	intersection = windowVects[i]
	# 	numIdxs = len(sortedIdxs)
	# 	nextSz = np.sum(intersection)
	# 	nextFilt = np.array(intersection, dtype=np.float)
	# 	nextFiltSum = np.array(nextFilt, dtype=np.float)
	# 	for j, idx in enumerate(sortedIdxs):
	# 		k = j + 1
	# 		filt = np.copy(nextFilt)
	# 		sz = nextSz
	# 		if k < numIdxs:
	# 			nextIdx = sortedIdxs[k] # since k = j+1
	# 			nextIntersection = np.minimum(filt, windowVectsBlur[nextIdx])
	# 			nextFiltSum += nextIntersection
	# 			nextFilt = nextFiltSum / (k+1)  # avg value of each feature in intersections
	# 			# nextSz = np.sum(nextFilt) # big even if like no intersection...
	# 			nextSz = np.sum(nextIntersection)
	# 			bigEnoughIntersection = nextIntersection[nextIntersection > minSim]
	# 			nextSz = np.sum(bigEnoughIntersection)
	# 		else:
	# 			nextSz = sz * p0

	# 		score = (sz - nextSz) * k
	# 		if k > 1 and score > bsfScore:
	# 			print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
	# 			print("sortedIdxs = {}".format(str(sortedIdxs)))
	# 			print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
	# 			print("------------------------")
	# 			bsfScore = score
	# 			bsfLocs = sortedIdxs[:k]
	# 			bsfIntersection = np.copy(filt)
	# 		# early abandon if this can't possibly beat the best score, which
	# 		# is the case exactly when the intersection is so small that perfect
	# 		# matches at all future locations still wouldn't be good enough
	# 		elif sz * numIdxs <= bsfScore:
	# 			# TODO can we actually early abandon here? next window loc
	# 			# could increase filt, and thus score for a given loc isn't
	# 			# necessarily non-increasing...
	# 			# 	-can't abandon using this test, but pretty sure there's
	# 			# 	a lower bound to be had here somewhere
	# 			# print("early abandoning window {} at k={}".format(i, k))
	# 			break

	# ------------------------ recover original ts

	bsfIntersection *= bsfIntersection >= minSim
	bsfIntersectionWindow = bsfIntersection.reshape((-1, windowLen))
	sums = np.sum(bsfIntersectionWindow, axis=0)

	kBest = len(bsfLocs)
	p0 = np.power(p0, kBest)
	# expectedOnesPerCol = p0 * X.shape[1] * 2
	# expectedOnesPerCol = p0 * X.shape[1]
	expectedOnesPerCol = p0 * X.shape[0]
	sums -= expectedOnesPerCol

	# plt.figure()
	# plt.plot(sums)

	start, end, _ = maxSubarray(sums)
	# patStart, patEnd = start, end + 1 + length
	patStart, patEnd = start, end + 1
	# patStart, patEnd = start + length // 2, end + 1 + length

	# ================================ show output

	print "bestScore = {}".format(bsfScore)
	print "bestLocations = {}".format(str(bsfLocs))

	for idx in bsfLocs:
		viz.plotRect(axSim, idx, idx+windowLen)

	# print bsfIntersectionWindow.shape
	# print sums.shape

	# plt.plot(sums)
	# viz.plotRect(plt.gca(), start, end + 1)
	for idx in bsfLocs:
		viz.plotRect(axSeq, idx + patStart, idx + patEnd)

	if answerIdxs is not None:
		for idx in answerIdxs:
			viz.plotVertLine(idx, ax=axSeq)

	plt.figure()
	plt.imshow(bsfIntersectionWindow, interpolation='nearest', aspect='auto')

	plt.tight_layout()
	plt.show()

Exemplo n.º 4

0

Exibir arquivo

Arquivo: ff10.py Projeto: yiweichung/extract

def extractTrueLocs(X,
                    Xblur,
                    bsfLocs,
                    bsfFilt,
                    windowLen,
                    Lmin,
                    Lmax,
                    extractTrueLocsAlgo=DEFAULT_EXTRACT_LOCS_ALGO,
                    **sink):

    if extractTrueLocsAlgo == 'none':
        return bsfLocs, bsfLocs + Lmax

    # determine expected value of an element of X (or, alternatively, Xblur)
    if extractTrueLocsAlgo == 'x':
        p0 = np.mean(X)
    else:
        p0 = np.mean(Xblur)

    if bsfFilt is None:
        print "WARNING: extractTrueLocs(): received None as filter"
        return np.array([0]), np.array([1])

    print "extractTrueLocs(): bsf locs", bsfLocs
    print "extractTrueLocs(): bsfFilt shape", bsfFilt.shape

    # compute the total filter weight in each column, ignoring low values
    bsfFiltWindow = np.copy(bsfFilt)
    # minSim = p0
    # bsfFiltWindow *= bsfFiltWindow >= minSim
    sums = np.sum(bsfFiltWindow, axis=0)

    # subtract off the amount of weight that we'd expect in each column by chance
    kBest = len(bsfLocs)
    expectedOnesFrac = np.power(p0, kBest -
                                1)  # this is like 0; basically no point
    expectedOnesPerCol = expectedOnesFrac * X.shape[0]
    sums -= expectedOnesPerCol

    # # at least for a couple msrc examples, these are basically flat--which makes sense
    # plt.figure()
    # plt.plot(sums)
    # plt.plot(np.zeros(len(sums)) + expectedOnesPerCol)
    # # from ..utils.misc import nowAsString
    # # plt.savefig('/Users/davis/Desktop/ts/figs/msrc/sums-{}.pdf'.format(nowAsString()))
    # plt.show()
    # plt.close()

    # pick the optimal set of indices to maximize the sum of sequential column sums
    start, end, _ = maxSubarray(sums)

    # ensure we picked at least Lmin points
    sumsLength = len(sums)
    while end - start < Lmin:
        nextStartVal = sums[start - 1] if start > 0 else -np.inf
        nextEndVal = sums[end] if end < sumsLength else -np.inf
        if nextStartVal > nextEndVal:
            start -= 1
        else:
            end += 1
    # ensure we picked at most Lmax points
    while end - start > Lmax:
        if sums[start] > sums[end - 1]:
            end -= 1
        else:
            start += 1

    locs = np.sort(np.asarray(bsfLocs))
    startIdxs = locs + start
    endIdxs = locs + end

    # # reconcile overlap; we first figure out how much we like the start vs end
    # # for different amounts of overlap
    # startSums = np.cumsum(sums)
    # endSums = np.cumsum(sums[::-1])
    # # gaps = startIdxs[1:] - startIdxs[:-1]
    # for i in range(len(startIdxs) - 1):
    # 	te1, ts2 = endIdxs[i], startIdxs[i+1]
    # 	gap = ts2 - te1
    # 	if gap > 0:
    # 		continue

    # 	# figure out best amount by which to crop start and end indices
    # 	overlap = -gap + 1
    # 	bestSplitCost = np.inf
    # 	bestMoveStart = -1
    # 	for moveStartThisMuch in range(0, overlap):
    # 		moveEndThisMuch = overlap - moveStartThisMuch
    # 		startCost = startSums[moveStartThisMuch-1] if moveStartThisMuch else 0.
    # 		endCost = endSums[moveEndThisMuch-1] if moveEndThisMuch else 0.
    # 		cost = startCost + endCost
    # 		if cost < bestSplitCost:
    # 			bestSplitCost = cost
    # 			bestMoveStart = moveStartThisMuch

    # 	startIdxs[i+1] += bestMoveStart
    # 	endIdxs[i] -= (overlap - bestMoveStart)

    if len(startIdxs) > 2:
        lengths = endIdxs - startIdxs
        maxInternalLength = np.max(lengths[1:-1])
        startIdxs[0] = max(startIdxs[0], endIdxs[0] - maxInternalLength)
        endIdxs[-1] = min(endIdxs[-1], startIdxs[-1] + maxInternalLength)

    print "extractTrueLocs(): startIdxs, endIdxs", startIdxs, endIdxs

    return startIdxs, endIdxs

Exemplo n.º 5

0

Exibir arquivo

Arquivo: ff10.py Projeto: dblalock/flock

def extractTrueLocs(X, Xblur, bsfLocs, bsfFilt, windowLen, Lmin, Lmax,
	extractTrueLocsAlgo=DEFAULT_EXTRACT_LOCS_ALGO, **sink):

	if extractTrueLocsAlgo == 'none':
		return bsfLocs, bsfLocs + Lmax

	# determine expected value of an element of X (or, alternatively, Xblur)
	if extractTrueLocsAlgo == 'x':
		p0 = np.mean(X)
	else:
		p0 = np.mean(Xblur)

	if bsfFilt is None:
		print "WARNING: extractTrueLocs(): received None as filter"
		return np.array([0]), np.array([1])

	print "extractTrueLocs(): bsf locs", bsfLocs
	print "extractTrueLocs(): bsfFilt shape", bsfFilt.shape

	# compute the total filter weight in each column, ignoring low values
	bsfFiltWindow = np.copy(bsfFilt)
	# minSim = p0
	# bsfFiltWindow *= bsfFiltWindow >= minSim
	sums = np.sum(bsfFiltWindow, axis=0)

	# subtract off the amount of weight that we'd expect in each column by chance
	kBest = len(bsfLocs)
	expectedOnesFrac = np.power(p0, kBest-1) # this is like 0; basically no point
	expectedOnesPerCol = expectedOnesFrac * X.shape[0]
	sums -= expectedOnesPerCol

	# # at least for a couple msrc examples, these are basically flat--which makes sense
	# plt.figure()
	# plt.plot(sums)
	# plt.plot(np.zeros(len(sums)) + expectedOnesPerCol)
	# # from ..utils.misc import nowAsString
	# # plt.savefig('/Users/davis/Desktop/ts/figs/msrc/sums-{}.pdf'.format(nowAsString()))
	# plt.show()
	# plt.close()

	# pick the optimal set of indices to maximize the sum of sequential column sums
	start, end, _ = maxSubarray(sums)

	# ensure we picked at least Lmin points
	sumsLength = len(sums)
	while end - start < Lmin:
		nextStartVal = sums[start-1] if start > 0 else -np.inf
		nextEndVal = sums[end] if end < sumsLength else -np.inf
		if nextStartVal > nextEndVal:
			start -= 1
		else:
			end += 1
	# ensure we picked at most Lmax points
	while end - start > Lmax:
		if sums[start] > sums[end-1]:
			end -= 1
		else:
			start += 1

	locs = np.sort(np.asarray(bsfLocs))
	startIdxs = locs + start
	endIdxs = locs + end

	# # reconcile overlap; we first figure out how much we like the start vs end
	# # for different amounts of overlap
	# startSums = np.cumsum(sums)
	# endSums = np.cumsum(sums[::-1])
	# # gaps = startIdxs[1:] - startIdxs[:-1]
	# for i in range(len(startIdxs) - 1):
	# 	te1, ts2 = endIdxs[i], startIdxs[i+1]
	# 	gap = ts2 - te1
	# 	if gap > 0:
	# 		continue

	# 	# figure out best amount by which to crop start and end indices
	# 	overlap = -gap + 1
	# 	bestSplitCost = np.inf
	# 	bestMoveStart = -1
	# 	for moveStartThisMuch in range(0, overlap):
	# 		moveEndThisMuch = overlap - moveStartThisMuch
	# 		startCost = startSums[moveStartThisMuch-1] if moveStartThisMuch else 0.
	# 		endCost = endSums[moveEndThisMuch-1] if moveEndThisMuch else 0.
	# 		cost = startCost + endCost
	# 		if cost < bestSplitCost:
	# 			bestSplitCost = cost
	# 			bestMoveStart = moveStartThisMuch

	# 	startIdxs[i+1] += bestMoveStart
	# 	endIdxs[i] -= (overlap - bestMoveStart)

	if len(startIdxs) > 2:
		lengths = endIdxs - startIdxs
		maxInternalLength = np.max(lengths[1:-1])
		startIdxs[0] = max(startIdxs[0], endIdxs[0] - maxInternalLength)
		endIdxs[-1] = min(endIdxs[-1], startIdxs[-1] + maxInternalLength)

	print "extractTrueLocs(): startIdxs, endIdxs", startIdxs, endIdxs

	return startIdxs, endIdxs

Exemplo n.º 6

0

Exibir arquivo

Arquivo: ff5.py Projeto: dblalock/flock

def main():
    # np.random.seed(123)

    # ================================ consts for everything
    # consts for generating data
    # n = 1000
    n = 500
    # n = 300
    # length = 8
    # length = 16
    length = 32
    # length = 50
    # nInstances = 3
    exampleLengths = [55, 60, 65]
    # exampleLengths = [60, 60, 60]
    noiseStd = 0.5

    # consts for algorithm
    Lmin = max(20, length)  # only needed for optimalAlignK() spacing
    Lmax = 100  # loose upper bound on pattern length
    minSim = 0.8  # loose cutoff for what counts as similar

    k0 = len(exampleLengths)  # for version where we tell it k

    # ------------------------ synthetic data

    # seq = synth.randconst(n, std=noiseStd)
    # seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=4)
    seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=16)
    seq = embedExamples(seq, exampleLengths)

    # ------------------------ msrc

    # from ..datasets import read_msrc as msrc
    # idxs = [2]
    # recordings = msrc.getRecordings(idxs=idxs)
    # r = list(recordings)[0]
    # seq = r.data[:, 20:23]
    # print "orig seq shape", seq.shape
    # seq = ar.downsampleMat(seq, rowsBy=10)
    # print "downsampled seq shape", seq.shape
    # length = 8
    # Lmin = len(seq) / 20
    # Lmax = len(seq) / 10
    # # Lmax = len(seq) / 20
    # k0 = 10
    # minSim = .5

    # noise = synth.randconst(seq.shape) # add noise for debugging
    # seq = np.r_[noise, seq, noise]

    # ================================ simMat

    X = computeSimMat(seq, length)
    # X[X < minSim] = 0.

    # X = ff2.localMaxFilterSimMat(X)
    # maxPoolWidth = min(length-1, Lmin-1)
    # maxPoolWidth /= 2
    # X = filters.maximum_filter1d(X, maxPoolWidth, axis=1)
    # X = filters.maximum_filter1d(X, length-1, axis=1)
    # X = filters.maximum_filter1d(X, length/2, axis=1)
    # X = filters.maximum_filter1d(X, 3, axis=1)
    # X = np.array(X > minSim, dtype=np.float)
    # X = X > minSim
    X[X < minSim] = 0.0
    # X = ff2.filterSimMat(X, length-1, 'hamming', scaleFilterMethod='max1')

    # X = sub.removeCorrelatedRows(X, .9, accumulate=True) # correlation > .9 -> kill it
    # X = sub.removeCorrelatedRows(X, .9, accumulate=False) # correlation > .9 -> kill it

    print "simMat dims:", X.shape
    print "simMat nonzeros, total, frac = ", np.count_nonzero(X), X.size, np.count_nonzero(X) / float(X.size)

    # ================================ plotting crap

    plt.figure()
    axSeq = plt.subplot2grid((4, 1), (0, 0))
    axSim = plt.subplot2grid((4, 1), (1, 0), rowspan=3)
    for ax in (axSeq, axSim):
        ax.autoscale(tight=True)
    axSeq.plot(seq)
    axSim.imshow(synth.appendZeros(X, length - 1), interpolation="nearest", aspect="auto")
    # im = axSim.imshow(synth.appendZeros(X, length-1), interpolation='nearest', aspect='auto')
    # plt.colorbar(im, cax=axSim)

    axSeq.set_title("Time Series")
    axSim.set_title("Similarities Matrix")

    # ================================ science

    # ------------------------ derived stats
    kMax = int(X.shape[1] / Lmin + 0.5)
    windowWidth = Lmax - length + 1
    # windowShape = (X.shape[0], Lmax)
    # windowSize = np.prod(windowShape)
    nLocs = X.shape[1] - windowWidth + 1

    # ------------------------ pairwise sims
    # colSims = np.dot(X.T, X)
    # filt = np.zeros((Lmax, Lmax)) + np.diag(np.ones(Lmax)) # zeros except 1s on diag
    # windowSims = sig.convolve2d(colSims, filt, mode='valid')

    print "computing intersections..."

    windowVects = vectorizeWindowLocs(X, windowWidth)
    windowSz = windowVects.shape[1]

    intersections = computeIntersections(X, windowVects, windowWidth)
    windowSims = np.sum(intersections, axis=2)
    # windowSims /= windowSz

    # assert(np.array_equal(windowSims, windowSims2)) # works

    # plt.figure()
    # plt.imshow(windowSims2)
    plt.figure()
    plt.imshow(windowSims / windowSz)
    plt.colorbar()

    # plt.show()
    # return

    print "computing similarity lower bound..."

    # TODO maybe try introducing beta prior to weight different values of k
    # 	-or, alternatively, see what happens if we tell it the right k

    #
    # Version where we look for stuff matching each intersection
    #

    # initialize with closest pair at least Lmin apart
    nonTrivialWindowSims = np.triu(windowSims)  # zero lower half
    for i in range(nLocs):
        nonTrivialWindowSims[i, i : min(nLocs, i + Lmin)] = 0  # zero Lmin past diag
    highestSimIdx = np.argmax(nonTrivialWindowSims)

    bsfLocs = sorted([highestSimIdx // nLocs, highestSimIdx % nLocs])
    bsfScore = windowSims[tuple(bsfLocs)] * 2  # list will yield a list
    bsfIntersection = intersections[bsfLocs]

    print "finding best locations..."

    # rowIntersectionSims = np.zeros(nLocs)
    # for i in range(nLocs):
    # 	if i % 20 == 0:
    # 		print("computing stuff for row {}".format(i))
    # 	bestPossibleScores = windowSims[i,min(nLocs,i+Lmin):] * kMax / 2.
    # 	candidateIdxs = np.where(bestPossibleScores > bsfScore)[0]
    # 	# print "candidateIdxs shape", candidateIdxs.shape
    # 	for j in candidateIdxs:
    # 		intersection = intersections[i, j]
    # 		rowIntersectionSims *= 0
    # 		rowIntersectionSims[candidateIdxs] = np.dot(intersections[i, candidateIdxs], intersection)
    # 		idxs = sub.optimalAlignment(rowIntersectionSims, Lmin)

    # 		# order idxs by descending order of associated score
    # 		sizes = rowIntersectionSims[idxs]
    # 		sortedSizesOrder = np.argsort(sizes)[::-1]
    # 		sortedIdxs = idxs[sortedSizesOrder]

    # 		# iteratively intersect with another near neighbor, compute the
    # 		# associated score, and check if it's better (or if we can early abandon)
    # 		numIdxs = len(sortedIdxs)
    # 		k = 2
    # 		for idx in sortedIdxs[2:]: # first 2 are no better than orig intersection
    # 			k += 1
    # 			intersection = np.logical_and(intersection, intersections[i, idx])
    # 			sz = np.sum(intersection)
    # 			score = sz * k
    # 			if score > bsfScore:
    # 				print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
    # 				bsfScore = score
    # 				bsfLocs = sortedIdxs[:k]
    # 				bsfIntersection = np.copy(intersection)
    # 			elif sz * numIdxs <= bsfScore:
    # 				# print("early abandoning window {} at k={}".format(i, k))
    # 				break

    #
    # Version where we look for similarities to orig seq
    #
    bsfScore = 0
    bsfLocs = None
    bsfIntersection = None
    for i, row in enumerate(windowSims):
        if i % 20 == 0:
            print ("computing stuff for row {}".format(i))
            # early abandon if this location has so little stuff that no
            # intersection with it can possibly beat the best score
        if windowSims[i, i] * kMax <= bsfScore:  # highest score is kMax identical locs
            # print("immediately abandoning window {}!".format(i))
            continue
            # print("not abandoning window {}!".format(i))
            # best combination of idxs such that none are within Lmin of each other
        idxs = sub.optimalAlignment(row, Lmin)
        # order idxs by descending order of associated score
        sizes = windowSims[i, idxs]
        sortedSizesOrder = np.argsort(sizes)[::-1]
        sortedIdxs = idxs[sortedSizesOrder]
        # retrieve intersection and compute score for best 2 locs
        k = 2
        intersection = intersections[sortedIdxs[0], sortedIdxs[1]]
        score = windowSims[sortedIdxs[0], sortedIdxs[1]] * k
        # possibly update best-so-far score and window locations
        if score > bsfScore:
            bsfScore = score
            bsfLocs = sortedIdxs[:k]
            bsfIntersection = np.copy(intersection)
            # iteratively intersect with another near neighbor, compute the
            # associated score, and check if it's better (or if we can early abandon)
        numIdxs = len(sortedIdxs)
        for idx in sortedIdxs[2:]:
            k += 1
            intersection = np.logical_and(intersection, windowVects[idx])
            sz = np.count_nonzero(intersection)
            score = sz * k
            if score > bsfScore:
                print ("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
                bsfScore = score
                bsfLocs = sortedIdxs[:k]
                bsfIntersection = np.copy(intersection)
                # early abandon if this can't possibly beat the best score, which
                # is the case exactly when the intersection is so small that perfect
                # matches at all future locations still wouldn't be good enough
            elif sz * numIdxs <= bsfScore:
                # print("early abandoning window {} at k={}".format(i, k))
                break

                #
                # Version where we we tell it k
                #
                # bsfScore = 0
                # bsfLocs = None
                # bsfIntersection = None
                # # selfSims = np.diagonal(windowSims)
                # # candidateRowIdxs = np.where(selfSims * k0 <= bsfScore)[0]
                # # for i in candidateRowIdxs:
                # # 	row = windowSims[i]
                # for i, row in enumerate(windowSims):
                # 	if i % 20 == 0:
                # 		print("computing stuff for row {}".format(i))
                # 	if windowSims[i,i] * k0 <= bsfScore:
                # 		continue
                # 	idxs = sub.optimalAlignK(row, Lmin, k0)
                # 	intersection = intersections[idxs[0], idxs[1]]
                # 	sz = 0
                # 	for idx in idxs[2:]:
                # 		intersection = np.logical_and(intersection, windowVects[idx])
                # 		sz = np.count_nonzero(intersection)
                # 		if sz * k0 <= bsfScore:
                # 			break
                # 	score = sz * k0
                # 	if score > bsfScore:
                # 		print("window {0}, k={1}, score={2} is the new best!".format(i, k0, score))
                # 		bsfScore = score
                # 		bsfLocs = idxs
                # 		bsfIntersection = np.copy(intersection)

                # ================================ show output

    print "bestScore = {}".format(bsfScore)
    print "bestLocations = {}".format(str(bsfLocs))

    for idx in bsfLocs:
        viz.plotRect(axSim, idx, idx + windowWidth)

    bsfIntersectionWindow = bsfIntersection.reshape((-1, windowWidth))
    sums = np.sum(bsfIntersectionWindow, axis=0)
    print bsfIntersectionWindow.shape
    print sums.shape

    plt.figure()
    plt.imshow(bsfIntersectionWindow, interpolation="nearest", aspect="auto")
    plt.colorbar()

    plt.figure()
    plt.plot(sums)

    p0 = np.mean(X)
    kBest = len(bsfLocs)
    p0 = np.power(p0, kBest)
    # expectedOnesPerCol = p0 * X.shape[1]
    expectedOnesPerCol = p0 * X.shape[1] * 2
    sums -= expectedOnesPerCol

    plt.plot(sums)

    start, end, _ = maxSubarray(sums)
    patStart, patEnd = start, end + 1 + length
    viz.plotRect(plt.gca(), start, end + 1)
    for idx in bsfLocs:
        viz.plotRect(axSeq, idx + patStart, idx + patEnd)

        # plt.figure()
        # # windowSims[bestRowIdx, bestColIdxs] *= 10 # color these differently
        # # plt.imshow(windowSims, interpolation='none')
        # plt.imshow(windowSims)
        # plt.colorbar()

        # # for col in range(colSims.shape[1]):
        # bestRowIdx = -1
        # bestColIdxs = []
        # bestSum = -1
        # kVals = np.arange(2,kMax)
        # for i, row in enumerate(windowSims):
        # 	optimalIdxs = sub.optimalAlignK(row, Lmin, kVals)
        # 	if not len(optimalIdxs):
        # 		continue
        # 	# print "optimalIdxs", optimalIdxs
        # 	sums = map(lambda idxs: np.sum(row[idxs]), optimalIdxs)
        # 	# print "sums", sums
        # 	sums = np.asarray(sums)
        # 	bestSumIdx = np.argmax(sums)
        # 	if sums[bestSumIdx] > bestSum:
        # 		bestRowIdx = i
        # 		bestColIdxs = optimalIdxs[bestSumIdx]
        # 		bestSum = sums[bestSumIdx]

        # print "bestRow = ", bestRowIdx
        # print "best end locs = ", bestColIdxs

    plt.tight_layout()
    plt.show()

Exemplo n.º 7

0

Exibir arquivo

Arquivo: ff6.py Projeto: icdm-extract/extract

def main():
	# np.random.seed(123)

	# ================================ consts for everything
	# consts for generating data
	# n = 1000
	n = 500
	# n = 300
	# length = 8
	# length = 16
	length = 32
	# length = 50
	# nInstances = 3
	exampleLengths = [55, 60, 65]
	# exampleLengths = [60, 60, 60]
	noiseStd = .5

	# consts for algorithm
	Lmin = max(20, length)	# only needed for optimalAlignK() spacing
	Lmax = 100				# loose upper bound on pattern length
	minSim = .5				# loose cutoff for what counts as similar

	# k0 = len(exampleLengths) # for version where we tell it k
	answerIdxs = None

	# ------------------------ synthetic data

	# seq = synth.randconst(n, std=noiseStd)
	# seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=4)
	seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=16)
	seq = embedExamples(seq, exampleLengths)

	# ------------------------ msrc

	from ..datasets import read_msrc as msrc
	idxs = [2]
	# idxs = [0]
	downsampleBy = 2
	recordings = msrc.getRecordings(idxs=idxs)
	r = list(recordings)[0]
	# seq = r.data
	# seq = r.data[:, :40]
	# seq = r.data[:, 20:23]
	seq = r.data[:, 24:27]
	# seq = r.data[:, 20:27]
	print "orig seq shape", seq.shape
	seq = ar.downsampleMat(seq, rowsBy=downsampleBy)
	print "downsampled seq shape", seq.shape
	length = max(8, Lmin / 2)
	Lmin = len(seq) / 20
	Lmax = len(seq) / 10
	# Lmax = len(seq) / 20
	# k0 = 10
	minSim = .5
	answerIdxs = r.gestureIdxs / downsampleBy
	# print "seq shape", seq.shape
	prePadLen = Lmax - length
	postPadLen = length - 1
	first = np.tile(seq[0], (prePadLen, 1))
	last = np.tile(seq[-1], (postPadLen, 1))
	seq = np.vstack((first, seq, last)) # pad with fixed val to allow all window positions
	# ^ TODO pad simMat with zeros instead--this introduces fake subseqs
	answerIdxs += prePadLen
	# seq = np.vstack((seq, np.tile(flat, (length-1, 1)))) # lets it get the last rep
	# print "seq shape", seq.shape


	# r.plot()

	# plt.figure()
	# plt.plot(r.sampleTimes)

	# answerIdxs = r.gestureIdxs / downsampleBy
	# print r.gestureIdxs
	# print answerIdxs

	# plt.figure()
	# plt.plot(seq)
	# for idx in answerIdxs:
	# 	ax = plt.gca()
	# 	viz.plotVertLine(idx, ax=ax)
	# plt.show()

	# return

	# noise = synth.randconst(seq.shape) # add noise for debugging
	# seq = np.r_[noise, seq, noise]

	# ================================ simMat

	X = computeSimMat(seq, length)
	X[X < minSim] = 0.
	# Xorig = np.copy(X)
	X = ff2.localMaxFilterSimMat(X)
	Xblur = ff2.filterSimMat(X, length-1, 'hamming', scaleFilterMethod='max1')
	# Xblur = ff2.filterSimMat(X, Lmin-1, 'hamming', scaleFilterMethod='max1')
	Xblur = np.minimum(Xblur, 1.)

	print "simMat dims:", X.shape
	Xnonzeros = np.count_nonzero(X)
	print "simMat nonzeros, total, frac = ", Xnonzeros, X.size, Xnonzeros / float(X.size)

	# ================================ plotting crap

	plt.figure()
	axSeq = plt.subplot2grid((4,1), (0,0))
	axSim = plt.subplot2grid((4,1), (1,0), rowspan=3)
	for ax in (axSeq, axSim):
		ax.autoscale(tight=True)
	axSeq.plot(seq)
	if answerIdxs is not None:
		for idx in answerIdxs:
			viz.plotVertLine(idx, ax=axSeq)
	Xpad = synth.appendZeros(X, length-1)
	axSim.imshow(Xpad, interpolation='nearest', aspect='auto')
	# im = axSim.imshow(Xpad, interpolation='nearest', aspect='auto')
	# plt.colorbar(im, cax=axSim)

	axSeq.set_title("Time Series")
	axSim.set_title("Similarities Matrix")

	# plt.figure()
	# plt.imshow(Xorig, interpolation='nearest', aspect='auto')
	# plt.colorbar()

	# plt.figure()
	# plt.imshow(X, interpolation='nearest', aspect='auto')
	# plt.colorbar()

	# # plt.figure()
	# # Xfilt = ff2.localMaxFilterSimMat(X, allowEq=True)
	# # plt.imshow(Xfilt, interpolation='nearest', aspect='auto')
	# # plt.colorbar()

	# # plt.figure()
	# # Xfilt = ff2.localMaxFilterSimMat(X, allowEq=False)
	# # plt.imshow(Xfilt, interpolation='nearest', aspect='auto')
	# # plt.colorbar()

	# plt.figure()
	# plt.imshow(Xblur, interpolation='nearest', aspect='auto')
	# plt.colorbar()

	# plt.show()
	# return

	# ================================ science

	# ------------------------ derived stats
	kMax = int(X.shape[1] / Lmin + .5)
	windowLen = Lmax - length + 1
	# windowShape = (X.shape[0], Lmax)
	# windowSize = np.prod(windowShape)
	nLocs = X.shape[1] - windowLen + 1

	p0 = np.mean(X) # fraction of entries that are 1 (roughly)

	# intersections = computeIntersections(X, windowLen)
	# windowSims = np.sum(intersections, axis=2)
	# colSims = np.dot(X.T, X)
	colSims = np.dot(X.T, Xblur)
	filt = np.zeros((windowLen, windowLen)) + np.diag(np.ones(windowLen)) # zeros except 1s on diag
	windowSims = sig.convolve2d(colSims, filt, mode='valid')

	windowVects = vectorizeWindowLocs(X, windowLen)
	windowVectsBlur = vectorizeWindowLocs(Xblur, windowLen)

	plt.figure()
	plt.imshow(windowSims, interpolation='nearest', aspect='auto')

	# plt.show()
	# return

	# ------------------------ find stuff

	# #
	# # Version where we we tell it k
	# #
	# bsfScore = 0
	# bsfLocs = None
	# bsfIntersection = None
	# # selfSims = np.diagonal(windowSims)
	# # candidateRowIdxs = np.where(selfSims * k0 <= bsfScore)[0]
	# # for i in candidateRowIdxs:
	# # 	row = windowSims[i]
	# for i, row in enumerate(windowSims):
	# 	if i % 20 == 0:
	# 		print("computing stuff for row {}".format(i))
	# 	if windowSims[i, i] * k0 <= bsfScore:
	# 		continue
	# 	idxs = sub.optimalAlignK(row, Lmin, k0)
	# 	intersection = windowVects[i]
	# 	sz = 0
	# 	for idx in idxs:
	# 		intersection = np.minimum(intersection, windowVectsBlur[idx])
	# 		sz = np.sum(intersection)
	# 		if sz * k0 <= bsfScore:
	# 			break
	# 	score = sz * k0
	# 	if score > bsfScore:
	# 		print("window {0}, k={1}, score={2} is the new best!".format(i, k0, score))
	# 		bsfScore = score
	# 		bsfLocs = idxs
	# 		bsfIntersection = np.copy(intersection)

	# #
	# # Version where we look for similarities to orig seq
	# #
	# bsfScore = 0
	# bsfLocs = None
	# bsfIntersection = None
	# for i, row in enumerate(windowSims):
	# 	if i % 20 == 0:
	# 		print("computing stuff for row {}".format(i))
	# 	# early abandon if this location has so little stuff that no
	# 	# intersection with it can possibly beat the best score
	# 	if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
	# 		# print("immediately abandoning window {}!".format(i))
	# 		continue
	# 	# print("not abandoning window {}!".format(i))
	# 	# best combination of idxs such that none are within Lmin of each other
	# 	idxs = sub.optimalAlignment(row, Lmin)
	# 	# print i, ": ", idxs
	# 	# order idxs by descending order of associated score
	# 	sizes = windowSims[i, idxs]
	# 	sortedSizesOrder = np.argsort(sizes)[::-1]
	# 	sortedIdxs = idxs[sortedSizesOrder]
	# 	# iteratively intersect with another near neighbor, compute the
	# 	# associated score, and check if it's better (or if we can early abandon)
	# 	intersection = windowVects[i]
	# 	numIdxs = len(sortedIdxs)
	# 	for j, idx in enumerate(sortedIdxs):
	# 		k = j + 1
	# 		intersection = np.minimum(intersection, windowVectsBlur[idx])
	# 		sz = np.sum(intersection) # use apodization window
	# 		# sz = np.count_nonzero(intersection) # just max-pool
	# 		score = sz * k
	# 		if k > 1 and score > bsfScore:
	# 			print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
	# 			print("sortedIdxs = {}".format(str(sortedIdxs)))
	# 			print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
	# 			print("------------------------")
	# 			bsfScore = score
	# 			bsfLocs = sortedIdxs[:k]
	# 			bsfIntersection = np.copy(intersection)
	# 		# early abandon if this can't possibly beat the best score, which
	# 		# is the case exactly when the intersection is so small that perfect
	# 		# matches at all future locations still wouldn't be good enough
	# 		elif sz * numIdxs <= bsfScore:
	# 			# print("early abandoning window {} at k={}".format(i, k))
	# 			break

	# #
	# # Version where we look for similarities to orig seq and use nearest
	# # enemy dist as M0
	# #
	# bsfScore = 0
	# bsfLocs = None
	# bsfIntersection = None
	# for i, row in enumerate(windowSims):
	# 	if i % 20 == 0:
	# 		print("computing stuff for row {}".format(i))
	# 	# early abandon if this location has so little stuff that no
	# 	# intersection with it can possibly beat the best score
	# 	if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
	# 		# print("immediately abandoning window {}!".format(i))
	# 		continue

	# 	# best combination of idxs such that none are within Lmin of each other
	# 	# validRow = row[:(-length + 1)] # can't go past end of ts
	# 	# idxs = sub.optimalAlignment(validRow, Lmin)
	# 	idxs = sub.optimalAlignment(row, Lmin) # goes past end of ts, but better

	# 	# order idxs by descending order of associated score
	# 	sizes = windowSims[i, idxs]
	# 	sortedSizesOrder = np.argsort(sizes)[::-1]
	# 	sortedIdxs = idxs[sortedSizesOrder]

	# 	# iteratively intersect with another near neighbor, compute the
	# 	# associated score, and check if it's better (or if we can early abandon)
	# 	intersection = windowVects[i]
	# 	numIdxs = len(sortedIdxs)
	# 	# allZeros = np.zeros(intersection.shape)
	# 	nextIntersection = np.minimum(intersection, windowVectsBlur[sortedIdxs[0]])
	# 	nextSz = np.sum(nextIntersection)
	# 	for j, idx in enumerate(sortedIdxs):
	# 		k = j + 1
	# 		intersection = np.copy(nextIntersection)
	# 		sz = nextSz
	# 		if k < numIdxs:
	# 			nextIdx = sortedIdxs[k] # since k = j+1
	# 			nextIntersection = np.minimum(intersection, windowVectsBlur[nextIdx])
	# 			nextSz = np.sum(nextIntersection) # sum -> use apodization window
	# 		else:
	# 			nextSz = sz * p0

	# 		score = (sz - nextSz) * k
	# 		if k > 1 and score > bsfScore:
	# 			print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
	# 			print("sortedIdxs = {}".format(str(sortedIdxs)))
	# 			print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
	# 			print("------------------------")
	# 			bsfScore = score
	# 			bsfLocs = sortedIdxs[:k]
	# 			bsfIntersection = np.copy(intersection)
	# 		# early abandon if this can't possibly beat the best score, which
	# 		# is the case exactly when the intersection is so small that perfect
	# 		# matches at all future locations still wouldn't be good enough
	# 		elif sz * numIdxs <= bsfScore:
	# 			# print("early abandoning window {} at k={}".format(i, k))
	# 			break

	#
	# Version where we look for similarities to orig seq and use nearest
	# enemy dist as M0, and use mean values instead of intersection
	#
	bsfScore = 0
	bsfLocs = None
	bsfIntersection = None
	for i, row in enumerate(windowSims):
		if i % 20 == 0:
			print("computing stuff for row {}".format(i))
		# early abandon if this location has so little stuff that no
		# intersection with it can possibly beat the best score
		if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
			continue

		# best combination of idxs such that none are within Lmin of each other
		# validRow = row[:(-length + 1)] # can't go past end of ts
		# idxs = sub.optimalAlignment(validRow, Lmin)
		idxs = sub.optimalAlignment(row, Lmin) # goes past end of ts, but better

		# order idxs by descending order of associated score
		sizes = windowSims[i, idxs]
		sortedSizesOrder = np.argsort(sizes)[::-1]
		sortedIdxs = idxs[sortedSizesOrder]

		# iteratively intersect with another near neighbor, compute the
		# associated score, and check if it's better (or if we can early abandon)
		intersection = windowVects[i]
		numIdxs = len(sortedIdxs)
		nextSz = np.sum(intersection)
		nextFilt = np.array(intersection, dtype=np.float)
		nextFiltSum = np.array(nextFilt, dtype=np.float)
		for j, idx in enumerate(sortedIdxs):
			k = j + 1
			filt = np.copy(nextFilt)
			sz = nextSz
			if k < numIdxs:
				nextIdx = sortedIdxs[k] # since k = j+1
				nextIntersection = np.minimum(filt, windowVectsBlur[nextIdx])
				nextFiltSum += nextIntersection
				nextFilt = nextFiltSum / (k+1)  # avg value of each feature in intersections
				# nextSz = np.sum(nextFilt) # big even if like no intersection...
				nextSz = np.sum(nextIntersection)
				bigEnoughIntersection = nextIntersection[nextIntersection > minSim]
				nextSz = np.sum(bigEnoughIntersection)
			else:
				nextSz = sz * p0

			score = (sz - nextSz) * k
			if k > 1 and score > bsfScore:
				print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
				print("sortedIdxs = {}".format(str(sortedIdxs)))
				print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
				print("------------------------")
				bsfScore = score
				bsfLocs = sortedIdxs[:k]
				bsfIntersection = np.copy(filt)
			# early abandon if this can't possibly beat the best score, which
			# is the case exactly when the intersection is so small that perfect
			# matches at all future locations still wouldn't be good enough
			elif sz * numIdxs <= bsfScore:
				# TODO can we actually early abandon here? next window loc
				# could increase filt, and thus score for a given loc isn't
				# necessarily non-increasing...
				# 	-can't abandon using this test, but pretty sure there's
				# 	a lower bound to be had here somewhere
				# print("early abandoning window {} at k={}".format(i, k))
				break

	#
	# Version where we look for similarities to orig seq and use nearest
	# enemy dist as M0, and use mean values instead of intersection,
	# and don't sort the indices, but instead care about overlap
	#
	# bsfScore = 0
	# bsfLocs = None
	# bsfIntersection = None
	# for i, row in enumerate(windowSims):
	# 	if i % 20 == 0:
	# 		print("computing stuff for row {}".format(i))
	# 	# early abandon if this location has so little stuff that no
	# 	# intersection with it can possibly beat the best score
	# 	if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
	# 		continue

	# 	# best combination of idxs such that none are within Lmin of each other
	# 	# validRow = row[:(-length + 1)] # can't go past end of ts
	# 	# idxs = sub.optimalAlignment(validRow, Lmin)
	# 	idxs = sub.optimalAlignment(row, Lmin) # goes past end of ts, but better

	# 	# order idxs by descending order of associated score
	# 	sizes = windowSims[i, idxs]
	# 	sortedSizesOrder = np.argsort(sizes)[::-1]
	# 	sortedIdxs = idxs[sortedSizesOrder]

	# 	# iteratively intersect with another near neighbor, compute the
	# 	# associated score, and check if it's better (or if we can early abandon)
	# 	intersection = windowVects[i]
	# 	numIdxs = len(sortedIdxs)
	# 	nextSz = np.sum(intersection)
	# 	nextFilt = np.array(intersection, dtype=np.float)
	# 	nextFiltSum = np.array(nextFilt, dtype=np.float)
	# 	for j, idx in enumerate(sortedIdxs):
	# 		k = j + 1
	# 		filt = np.copy(nextFilt)
	# 		sz = nextSz
	# 		if k < numIdxs:
	# 			nextIdx = sortedIdxs[k] # since k = j+1
	# 			nextIntersection = np.minimum(filt, windowVectsBlur[nextIdx])
	# 			nextFiltSum += nextIntersection
	# 			nextFilt = nextFiltSum / (k+1)  # avg value of each feature in intersections
	# 			# nextSz = np.sum(nextFilt) # big even if like no intersection...
	# 			nextSz = np.sum(nextIntersection)
	# 			bigEnoughIntersection = nextIntersection[nextIntersection > minSim]
	# 			nextSz = np.sum(bigEnoughIntersection)
	# 		else:
	# 			nextSz = sz * p0

	# 		score = (sz - nextSz) * k
	# 		if k > 1 and score > bsfScore:
	# 			print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
	# 			print("sortedIdxs = {}".format(str(sortedIdxs)))
	# 			print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
	# 			print("------------------------")
	# 			bsfScore = score
	# 			bsfLocs = sortedIdxs[:k]
	# 			bsfIntersection = np.copy(filt)
	# 		# early abandon if this can't possibly beat the best score, which
	# 		# is the case exactly when the intersection is so small that perfect
	# 		# matches at all future locations still wouldn't be good enough
	# 		elif sz * numIdxs <= bsfScore:
	# 			# TODO can we actually early abandon here? next window loc
	# 			# could increase filt, and thus score for a given loc isn't
	# 			# necessarily non-increasing...
	# 			# 	-can't abandon using this test, but pretty sure there's
	# 			# 	a lower bound to be had here somewhere
	# 			# print("early abandoning window {} at k={}".format(i, k))
	# 			break

	# ------------------------ recover original ts

	bsfIntersection *= bsfIntersection >= minSim
	bsfIntersectionWindow = bsfIntersection.reshape((-1, windowLen))
	sums = np.sum(bsfIntersectionWindow, axis=0)

	kBest = len(bsfLocs)
	p0 = np.power(p0, kBest)
	# expectedOnesPerCol = p0 * X.shape[1]
	expectedOnesPerCol = p0 * X.shape[1] * 2
	sums -= expectedOnesPerCol

	plt.plot(sums)

	start, end, _ = maxSubarray(sums)
	patStart, patEnd = start, end + 1 + length

	# ================================ show output

	print "bestScore = {}".format(bsfScore)
	print "bestLocations = {}".format(str(bsfLocs))

	for idx in bsfLocs:
		viz.plotRect(axSim, idx, idx+windowLen)

	# print bsfIntersectionWindow.shape
	# print sums.shape

	# plt.plot(sums)
	# viz.plotRect(plt.gca(), start, end + 1)
	for idx in bsfLocs:
		viz.plotRect(axSeq, idx + patStart, idx + patEnd)

	plt.figure()
	plt.imshow(bsfIntersectionWindow, interpolation='nearest', aspect='auto')

	plt.tight_layout()
	plt.show()

Exemplo n.º 8

0

Exibir arquivo

Arquivo: ff5.py Projeto: yiweichung/extract

def main():
    # np.random.seed(123)

    # ================================ consts for everything
    # consts for generating data
    # n = 1000
    n = 500
    # n = 300
    # length = 8
    # length = 16
    length = 32
    # length = 50
    # nInstances = 3
    exampleLengths = [55, 60, 65]
    # exampleLengths = [60, 60, 60]
    noiseStd = .5

    # consts for algorithm
    Lmin = max(20, length)  # only needed for optimalAlignK() spacing
    Lmax = 100  # loose upper bound on pattern length
    minSim = .8  # loose cutoff for what counts as similar

    k0 = len(exampleLengths)  # for version where we tell it k

    # ------------------------ synthetic data

    # seq = synth.randconst(n, std=noiseStd)
    # seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=4)
    seq = synth.notSoRandomWalk(n,
                                std=noiseStd,
                                trendFilterLength=80,
                                lpfLength=16)
    seq = embedExamples(seq, exampleLengths)

    # ------------------------ msrc

    # from ..datasets import read_msrc as msrc
    # idxs = [2]
    # recordings = msrc.getRecordings(idxs=idxs)
    # r = list(recordings)[0]
    # seq = r.data[:, 20:23]
    # print "orig seq shape", seq.shape
    # seq = ar.downsampleMat(seq, rowsBy=10)
    # print "downsampled seq shape", seq.shape
    # length = 8
    # Lmin = len(seq) / 20
    # Lmax = len(seq) / 10
    # # Lmax = len(seq) / 20
    # k0 = 10
    # minSim = .5

    # noise = synth.randconst(seq.shape) # add noise for debugging
    # seq = np.r_[noise, seq, noise]

    # ================================ simMat

    X = computeSimMat(seq, length)
    # X[X < minSim] = 0.

    # X = ff2.localMaxFilterSimMat(X)
    # maxPoolWidth = min(length-1, Lmin-1)
    # maxPoolWidth /= 2
    # X = filters.maximum_filter1d(X, maxPoolWidth, axis=1)
    # X = filters.maximum_filter1d(X, length-1, axis=1)
    # X = filters.maximum_filter1d(X, length/2, axis=1)
    # X = filters.maximum_filter1d(X, 3, axis=1)
    # X = np.array(X > minSim, dtype=np.float)
    # X = X > minSim
    X[X < minSim] = 0.
    # X = ff2.filterSimMat(X, length-1, 'hamming', scaleFilterMethod='max1')

    # X = sub.removeCorrelatedRows(X, .9, accumulate=True) # correlation > .9 -> kill it
    # X = sub.removeCorrelatedRows(X, .9, accumulate=False) # correlation > .9 -> kill it

    print "simMat dims:", X.shape
    print "simMat nonzeros, total, frac = ", np.count_nonzero(
        X), X.size, np.count_nonzero(X) / float(X.size)

    # ================================ plotting crap

    plt.figure()
    axSeq = plt.subplot2grid((4, 1), (0, 0))
    axSim = plt.subplot2grid((4, 1), (1, 0), rowspan=3)
    for ax in (axSeq, axSim):
        ax.autoscale(tight=True)
    axSeq.plot(seq)
    axSim.imshow(synth.appendZeros(X, length - 1),
                 interpolation='nearest',
                 aspect='auto')
    # im = axSim.imshow(synth.appendZeros(X, length-1), interpolation='nearest', aspect='auto')
    # plt.colorbar(im, cax=axSim)

    axSeq.set_title("Time Series")
    axSim.set_title("Similarities Matrix")

    # ================================ science

    # ------------------------ derived stats
    kMax = int(X.shape[1] / Lmin + .5)
    windowWidth = Lmax - length + 1
    # windowShape = (X.shape[0], Lmax)
    # windowSize = np.prod(windowShape)
    nLocs = X.shape[1] - windowWidth + 1

    # ------------------------ pairwise sims
    # colSims = np.dot(X.T, X)
    # filt = np.zeros((Lmax, Lmax)) + np.diag(np.ones(Lmax)) # zeros except 1s on diag
    # windowSims = sig.convolve2d(colSims, filt, mode='valid')

    print "computing intersections..."

    windowVects = vectorizeWindowLocs(X, windowWidth)
    windowSz = windowVects.shape[1]

    intersections = computeIntersections(X, windowVects, windowWidth)
    windowSims = np.sum(intersections, axis=2)
    # windowSims /= windowSz

    # assert(np.array_equal(windowSims, windowSims2)) # works

    # plt.figure()
    # plt.imshow(windowSims2)
    plt.figure()
    plt.imshow(windowSims / windowSz)
    plt.colorbar()

    # plt.show()
    # return

    print "computing similarity lower bound..."

    # TODO maybe try introducing beta prior to weight different values of k
    # 	-or, alternatively, see what happens if we tell it the right k

    #
    # Version where we look for stuff matching each intersection
    #

    # initialize with closest pair at least Lmin apart
    nonTrivialWindowSims = np.triu(windowSims)  # zero lower half
    for i in range(nLocs):
        nonTrivialWindowSims[i,
                             i:min(nLocs, i + Lmin)] = 0  # zero Lmin past diag
    highestSimIdx = np.argmax(nonTrivialWindowSims)

    bsfLocs = sorted([highestSimIdx // nLocs, highestSimIdx % nLocs])
    bsfScore = windowSims[tuple(bsfLocs)] * 2  # list will yield a list
    bsfIntersection = intersections[bsfLocs]

    print "finding best locations..."

    # rowIntersectionSims = np.zeros(nLocs)
    # for i in range(nLocs):
    # 	if i % 20 == 0:
    # 		print("computing stuff for row {}".format(i))
    # 	bestPossibleScores = windowSims[i,min(nLocs,i+Lmin):] * kMax / 2.
    # 	candidateIdxs = np.where(bestPossibleScores > bsfScore)[0]
    # 	# print "candidateIdxs shape", candidateIdxs.shape
    # 	for j in candidateIdxs:
    # 		intersection = intersections[i, j]
    # 		rowIntersectionSims *= 0
    # 		rowIntersectionSims[candidateIdxs] = np.dot(intersections[i, candidateIdxs], intersection)
    # 		idxs = sub.optimalAlignment(rowIntersectionSims, Lmin)

    # 		# order idxs by descending order of associated score
    # 		sizes = rowIntersectionSims[idxs]
    # 		sortedSizesOrder = np.argsort(sizes)[::-1]
    # 		sortedIdxs = idxs[sortedSizesOrder]

    # 		# iteratively intersect with another near neighbor, compute the
    # 		# associated score, and check if it's better (or if we can early abandon)
    # 		numIdxs = len(sortedIdxs)
    # 		k = 2
    # 		for idx in sortedIdxs[2:]: # first 2 are no better than orig intersection
    # 			k += 1
    # 			intersection = np.logical_and(intersection, intersections[i, idx])
    # 			sz = np.sum(intersection)
    # 			score = sz * k
    # 			if score > bsfScore:
    # 				print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
    # 				bsfScore = score
    # 				bsfLocs = sortedIdxs[:k]
    # 				bsfIntersection = np.copy(intersection)
    # 			elif sz * numIdxs <= bsfScore:
    # 				# print("early abandoning window {} at k={}".format(i, k))
    # 				break

    #
    # Version where we look for similarities to orig seq
    #
    bsfScore = 0
    bsfLocs = None
    bsfIntersection = None
    for i, row in enumerate(windowSims):
        if i % 20 == 0:
            print("computing stuff for row {}".format(i))
        # early abandon if this location has so little stuff that no
        # intersection with it can possibly beat the best score
        if windowSims[
                i,
                i] * kMax <= bsfScore:  # highest score is kMax identical locs
            # print("immediately abandoning window {}!".format(i))
            continue
        # print("not abandoning window {}!".format(i))
        # best combination of idxs such that none are within Lmin of each other
        idxs = sub.optimalAlignment(row, Lmin)
        # order idxs by descending order of associated score
        sizes = windowSims[i, idxs]
        sortedSizesOrder = np.argsort(sizes)[::-1]
        sortedIdxs = idxs[sortedSizesOrder]
        # retrieve intersection and compute score for best 2 locs
        k = 2
        intersection = intersections[sortedIdxs[0], sortedIdxs[1]]
        score = windowSims[sortedIdxs[0], sortedIdxs[1]] * k
        # possibly update best-so-far score and window locations
        if score > bsfScore:
            bsfScore = score
            bsfLocs = sortedIdxs[:k]
            bsfIntersection = np.copy(intersection)
        # iteratively intersect with another near neighbor, compute the
        # associated score, and check if it's better (or if we can early abandon)
        numIdxs = len(sortedIdxs)
        for idx in sortedIdxs[2:]:
            k += 1
            intersection = np.logical_and(intersection, windowVects[idx])
            sz = np.count_nonzero(intersection)
            score = sz * k
            if score > bsfScore:
                print("window {0}, k={1}, score={2} is the new best!".format(
                    i, k, score))
                bsfScore = score
                bsfLocs = sortedIdxs[:k]
                bsfIntersection = np.copy(intersection)
            # early abandon if this can't possibly beat the best score, which
            # is the case exactly when the intersection is so small that perfect
            # matches at all future locations still wouldn't be good enough
            elif sz * numIdxs <= bsfScore:
                # print("early abandoning window {} at k={}".format(i, k))
                break

    #
    # Version where we we tell it k
    #
    # bsfScore = 0
    # bsfLocs = None
    # bsfIntersection = None
    # # selfSims = np.diagonal(windowSims)
    # # candidateRowIdxs = np.where(selfSims * k0 <= bsfScore)[0]
    # # for i in candidateRowIdxs:
    # # 	row = windowSims[i]
    # for i, row in enumerate(windowSims):
    # 	if i % 20 == 0:
    # 		print("computing stuff for row {}".format(i))
    # 	if windowSims[i,i] * k0 <= bsfScore:
    # 		continue
    # 	idxs = sub.optimalAlignK(row, Lmin, k0)
    # 	intersection = intersections[idxs[0], idxs[1]]
    # 	sz = 0
    # 	for idx in idxs[2:]:
    # 		intersection = np.logical_and(intersection, windowVects[idx])
    # 		sz = np.count_nonzero(intersection)
    # 		if sz * k0 <= bsfScore:
    # 			break
    # 	score = sz * k0
    # 	if score > bsfScore:
    # 		print("window {0}, k={1}, score={2} is the new best!".format(i, k0, score))
    # 		bsfScore = score
    # 		bsfLocs = idxs
    # 		bsfIntersection = np.copy(intersection)

    # ================================ show output

    print "bestScore = {}".format(bsfScore)
    print "bestLocations = {}".format(str(bsfLocs))

    for idx in bsfLocs:
        viz.plotRect(axSim, idx, idx + windowWidth)

    bsfIntersectionWindow = bsfIntersection.reshape((-1, windowWidth))
    sums = np.sum(bsfIntersectionWindow, axis=0)
    print bsfIntersectionWindow.shape
    print sums.shape

    plt.figure()
    plt.imshow(bsfIntersectionWindow, interpolation='nearest', aspect='auto')
    plt.colorbar()

    plt.figure()
    plt.plot(sums)

    p0 = np.mean(X)
    kBest = len(bsfLocs)
    p0 = np.power(p0, kBest)
    # expectedOnesPerCol = p0 * X.shape[1]
    expectedOnesPerCol = p0 * X.shape[1] * 2
    sums -= expectedOnesPerCol

    plt.plot(sums)

    start, end, _ = maxSubarray(sums)
    patStart, patEnd = start, end + 1 + length
    viz.plotRect(plt.gca(), start, end + 1)
    for idx in bsfLocs:
        viz.plotRect(axSeq, idx + patStart, idx + patEnd)

    # plt.figure()
    # # windowSims[bestRowIdx, bestColIdxs] *= 10 # color these differently
    # # plt.imshow(windowSims, interpolation='none')
    # plt.imshow(windowSims)
    # plt.colorbar()

    # # for col in range(colSims.shape[1]):
    # bestRowIdx = -1
    # bestColIdxs = []
    # bestSum = -1
    # kVals = np.arange(2,kMax)
    # for i, row in enumerate(windowSims):
    # 	optimalIdxs = sub.optimalAlignK(row, Lmin, kVals)
    # 	if not len(optimalIdxs):
    # 		continue
    # 	# print "optimalIdxs", optimalIdxs
    # 	sums = map(lambda idxs: np.sum(row[idxs]), optimalIdxs)
    # 	# print "sums", sums
    # 	sums = np.asarray(sums)
    # 	bestSumIdx = np.argmax(sums)
    # 	if sums[bestSumIdx] > bestSum:
    # 		bestRowIdx = i
    # 		bestColIdxs = optimalIdxs[bestSumIdx]
    # 		bestSum = sums[bestSumIdx]

    # print "bestRow = ", bestRowIdx
    # print "best end locs = ", bestColIdxs

    plt.tight_layout()
    plt.show()

Exemplo n.º 9

0

Exibir arquivo

def main():
    # np.random.seed(123)

    # ================================ consts for everything
    # consts for generating data
    # n = 1000
    n = 500
    # n = 300
    # length = 8
    # length = 16
    # length = 32
    # length = 50
    # nInstances = 3
    exampleLengths = [55, 60, 65]
    # exampleLengths = [60, 60, 60]
    noiseStd = .5

    # consts for algorithm
    # Lmin = max(20, length)	# only needed for optimalAlignK() spacing
    Lmin = 20  # only needed for optimalAlignK() spacing
    Lmax = 100  # loose upper bound on pattern length
    # minSim = .5
    minSim = 0.
    length = Lmin // 2
    # length = Lmin // 4
    # length = 3

    answerIdxs = None

    USE_MSRC = True
    # USE_MSRC = False

    # ================================ data

    # ------------------------ synthetic data

    # seq = synth.randconst(n, std=noiseStd)
    seq = synth.randwalk(n, std=noiseStd)
    # seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=4)
    # seq = synth.notSoRandomWalk(n, std=noiseStd, trendFilterLength=80, lpfLength=16)
    seq = embedExamples(seq, exampleLengths)
    # seq = synth.appendZeros(seq, Lmax)

    # ------------------------ msrc

    if USE_MSRC:
        from ..datasets import read_msrc as msrc
        # idxs = [0]
        # idxs = [1]
        # idxs = [2]
        # idxs = [7] # length 1500, but instances of length like 20
        # idxs = [8] # gets owned on this one cuz patterns of length like 100
        # idxs = [9] # missing an annotation, it appears
        idxs = [10]  # something crazy about feature rep here # TODO fix
        # idxs = [11] # crap cuz bad, low-variance signals
        # idxs = [12] # has garbagey sections like [10]
        # idxs = [13] # empty feature mat # TODO
        # idxs = [14]
        downsampleBy = 2
        # downsampleBy = 1
        recordings = msrc.getRecordings(idxs=idxs)
        r = list(recordings)[0]
        # seq = r.data
        # seq = r.data[:, :40]
        # seq = r.data[:, 20:23]
        seq = r.data[:, 24:27]
        # seq = r.data[:, 20:27]
        print "orig seq shape", seq.shape
        seq = ar.downsampleMat(seq, rowsBy=downsampleBy)
        print "downsampled seq shape", seq.shape
        # length = max(8, Lmin / 2)
        Lmin = len(seq) // 20
        # Lmax = len(seq) // 8
        Lmax = len(seq) // 10
        length = Lmin // 2
        # Lmax = len(seq) / 20
        # k0 = 10
        # minSim = .5
        answerIdxs = r.gestureIdxs / downsampleBy
        # print "seq shape", seq.shape
        prePadLen = Lmax - length
        # postPadLen = length - 1
        postPadLen = Lmax - length
        first = np.tile(seq[0], (prePadLen, 1))
        last = np.tile(seq[-1], (postPadLen, 1))
        seq = np.vstack(
            (first, seq,
             last))  # pad with fixed val to allow all window positions
        # ^ TODO pad simMat with zeros instead--this introduces fake subseqs
        answerIdxs += prePadLen
        # seq = np.vstack((seq, np.tile(flat, (length-1, 1)))) # lets it get the last rep
        # print "seq shape", seq.shape

    # ================================ feature construction

    logMaxLength = int(np.floor(np.log2(Lmax)))
    # logMaxLength = int(np.ceil(np.log2(Lmax)))
    # logMinLength = 3 # -> length 8
    # logMinLength = 4 # -> length 16
    logMinLength = int(np.floor(np.log2(Lmin)))
    lengths = np.arange(logMinLength, logMaxLength + 1)
    lengths = 2**lengths
    # lengths = [16]

    cardinality = 8
    breakpoints = rep.saxBreakpoints(cardinality)

    X = rep.multiNormalizeAndSparseQuantize(seq, lengths, breakpoints)
    # X = rep.multiSparseLineProject(seq, lengths, breakpoints, removeZeroRows=False)

    # lengths2 = np.arange(3, logMaxLength + 1)
    # lengths2 = 2 ** lengths2
    lengths2 = lengths  # TODO uncomment after debug
    # lengths2 = [8, 32]

    # breakpoints2 = rep.defaultSparseLineBreakpoints(seq, scaleHowMany=2)
    breakpoints2 = rep.defaultSparseLineBreakpoints(seq)
    X2 = rep.multiSparseLineProject(seq, lengths2, breakpoints2)
    # X2 = X2 > minSim
    X2 = X2 > 0.  # ignore correlations

    # print "shapes:"
    # print X.shape
    # print X2.shape

    X = np.vstack((X, X2))

    # plt.figure()
    # # viz.imshowBetter(X)
    # viz.imshowBetter(X2)
    # plt.figure()
    # viz.imshowBetter(X2 > 0.)
    # plt.show()

    # print seq.shape
    # plt.figure()

    # plt.plot(seq[:,0]) # bit of pattern, but only varies between -.4 and .2

    # okay, so 1st dim is all zeros
    # variances = rep.slidingVariance(seq, 8)
    # for dim in range(len(variances)):
    # 	plt.figure()
    # 	plt.plot(variances[dim].flatten())

    # print variances.shape
    # variances = rep.vstack3Tensor(variances.T)
    # print variances.shape
    # plt.plot(variances)

    # plt.show()
    # return

    X = localMaxFilterSimMat(X)
    # Xbool = np.copy(X)
    featureMeans = np.mean(X, axis=1).reshape((-1, 1))
    # print featureMeans
    X *= -np.log2(featureMeans)  # variable encoding costs for rows
    # X /= -np.log(featureMeans)
    # Xblur = localMaxFilterSimMat(X) # try only maxFiltering Xblur
    Xblur = filterSimMat(X, length - 1, 'hamming', scaleFilterMethod='max1')

    # plt.figure()
    # viz.imshowBetter(X)
    # plt.figure()
    # viz.imshowBetter(Xblur)

    print "featureMat dims:", X.shape
    Xnonzeros = np.count_nonzero(X)
    print "featureMat nonzeros, total, frac = ", Xnonzeros, X.size, Xnonzeros / float(
        X.size)

    # plt.show()
    # return

    # ================================ plotting crap

    plt.figure()
    axSeq = plt.subplot2grid((4, 1), (0, 0))
    axSim = plt.subplot2grid((4, 1), (1, 0), rowspan=3)
    for ax in (axSeq, axSim):
        ax.autoscale(tight=True)
    axSeq.plot(seq)
    # if answerIdxs is not None:
    # 	for idx in answerIdxs:
    # 		viz.plotVertLine(idx, ax=axSeq)
    padLen = len(seq) - X.shape[1]
    Xpad = synth.appendZeros(X, padLen)
    axSim.imshow(Xpad, interpolation='nearest', aspect='auto')
    # im = axSim.imshow(Xpad, interpolation='nearest', aspect='auto')
    # plt.colorbar(im, cax=axSim)

    axSeq.set_title("Time Series")
    axSim.set_title("Feature Matrix")

    # plt.show()
    # return

    # ================================ science

    # ------------------------ derived stats
    kMax = int(X.shape[1] / Lmin + .5)
    windowLen = Lmax - length + 1

    p0 = np.mean(X)  # fraction of entries that are 1 (roughly)
    # p0 = 2 * np.mean(X) # lambda for l0 reg based on features being bernoulli at 2 locs
    minSim = p0
    # p0 = -np.log(np.mean(Xbool)) # fraction of entries that are 1 (roughly)
    # noiseSz = p0 * X.shape[0] * windowLen # way too hard to beat
    expectedOnesPerWindow = p0 * X.shape[0] * windowLen
    noiseSz = p0 * expectedOnesPerWindow  # num ones to begin with

    # intersections = computeIntersections(X, windowLen)
    # windowSims = np.sum(intersections, axis=2)
    # colSims = np.dot(X.T, X)
    colSims = np.dot(X.T, Xblur)
    filt = np.zeros((windowLen, windowLen)) + np.diag(
        np.ones(windowLen))  # zeros except 1s on diag
    windowSims = sig.convolve2d(colSims, filt, mode='valid')

    windowVects = vectorizeWindowLocs(X, windowLen)
    windowVectsBlur = vectorizeWindowLocs(Xblur, windowLen)

    # plt.figure()
    # plt.imshow(windowSims, interpolation='nearest', aspect='auto')

    # ------------------------ find stuff

    #
    # Version where we look for similarities to orig seq and use nearest
    # enemy dist as M0, and use mean values instead of intersection
    #
    bsfScore = 0
    bsfLocs = None
    bsfIntersection = None
    for i, row in enumerate(windowSims):
        if i % 20 == 0:
            print("computing stuff for row {}".format(i))
        # early abandon if this location has so little stuff that no
        # intersection with it can possibly beat the best score
        if windowSims[
                i,
                i] * kMax <= bsfScore:  # highest score is kMax identical locs
            continue

        # best combination of idxs such that none are within Lmin of each other
        # validRow = row[:(-length + 1)] # can't go past end of ts
        # idxs = sub.optimalAlignment(validRow, Lmin)
        idxs = sub.optimalAlignment(row,
                                    Lmin)  # goes past end of ts, but better

        # order idxs by descending order of associated score
        sizes = windowSims[i, idxs]
        sortedSizesOrder = np.argsort(sizes)[::-1]
        sortedIdxs = idxs[sortedSizesOrder]

        # iteratively intersect with another near neighbor, compute the
        # associated score, and check if it's better (or if we can early abandon)
        intersection = windowVects[i]
        numIdxs = len(sortedIdxs)
        nextSz = np.sum(intersection)
        nextFilt = np.array(intersection, dtype=np.float)
        nextFiltSum = np.array(nextFilt, dtype=np.float)
        for j, idx in enumerate(sortedIdxs):
            k = j + 1
            filt = np.copy(nextFilt)
            sz = nextSz
            if k < numIdxs:
                nextIdx = sortedIdxs[k]  # since k = j+1
                nextIntersection = np.minimum(filt, windowVectsBlur[nextIdx])
                nextFiltSum += nextIntersection
                nextFilt = nextFiltSum / (
                    k + 1)  # avg value of each feature in intersections
                # nextSz = np.sum(nextFilt) # big even if like no intersection...
                nextSz = np.sum(nextIntersection)
                bigEnoughIntersection = nextIntersection[
                    nextIntersection > minSim]
                nextSz = np.sum(bigEnoughIntersection)
            else:
                nextSz = sz * p0
                # nextSz = -1
            enemySz = max(nextSz, noiseSz)

            score = (sz - enemySz) * k
            if k > 1 and score > bsfScore:
                print("window {0}, k={1}, score={2} is the new best!".format(
                    i, k, score))
                print("sortedIdxs = {}".format(str(sortedIdxs)))
                print("sortedIdxScores = {}".format(
                    str(windowSims[i, sortedIdxs])))
                print("------------------------")
                bsfScore = score
                bsfLocs = sortedIdxs[:k]
                bsfIntersection = np.copy(filt)
            # early abandon if this can't possibly beat the best score, which
            # is the case exactly when the intersection is so small that perfect
            # matches at all future locations still wouldn't be good enough
            elif sz * numIdxs <= bsfScore:
                # TODO can we actually early abandon here? next window loc
                # could increase filt, and thus score for a given loc isn't
                # necessarily non-increasing...
                # 	-can't abandon using this test, but pretty sure there's
                # 	a lower bound to be had here somewhere
                # print("early abandoning window {} at k={}".format(i, k))
                break
            elif noiseSz > nextSz:
                break

    # #
    # # Version where we look for similarities to orig seq and use nearest
    # # enemy dist as M0, and use mean values instead of intersection,
    # # and don't sort the indices, but instead care about overlap
    # #
    # bsfScore = 0
    # bsfLocs = None
    # bsfIntersection = None
    # for i, row in enumerate(windowSims):
    # 	if i % 20 == 0:
    # 		print("computing stuff for row {}".format(i))
    # 	# early abandon if this location has so little stuff that no
    # 	# intersection with it can possibly beat the best score
    # 	if windowSims[i,i] * kMax <= bsfScore: # highest score is kMax identical locs
    # 		continue

    # 	# best combination of idxs such that none are within Lmin of each other
    # 	# validRow = row[:(-length + 1)] # can't go past end of ts
    # 	# idxs = sub.optimalAlignment(validRow, Lmin)
    # 	idxs = sub.optimalAlignment(row, Lmin) # goes past end of ts, but better

    # 	# order idxs by descending order of associated score
    # 	sizes = windowSims[i, idxs]
    # 	sortedSizesOrder = np.argsort(sizes)[::-1]
    # 	sortedIdxs = idxs[sortedSizesOrder]

    # 	# iteratively intersect with another near neighbor, compute the
    # 	# associated score, and check if it's better (or if we can early abandon)
    # 	intersection = windowVects[i]
    # 	numIdxs = len(sortedIdxs)
    # 	nextSz = np.sum(intersection)
    # 	nextFilt = np.array(intersection, dtype=np.float)
    # 	nextFiltSum = np.array(nextFilt, dtype=np.float)
    # 	for j, idx in enumerate(sortedIdxs):
    # 		k = j + 1
    # 		filt = np.copy(nextFilt)
    # 		sz = nextSz
    # 		if k < numIdxs:
    # 			nextIdx = sortedIdxs[k] # since k = j+1
    # 			nextIntersection = np.minimum(filt, windowVectsBlur[nextIdx])
    # 			nextFiltSum += nextIntersection
    # 			nextFilt = nextFiltSum / (k+1)  # avg value of each feature in intersections
    # 			# nextSz = np.sum(nextFilt) # big even if like no intersection...
    # 			nextSz = np.sum(nextIntersection)
    # 			bigEnoughIntersection = nextIntersection[nextIntersection > minSim]
    # 			nextSz = np.sum(bigEnoughIntersection)
    # 		else:
    # 			nextSz = sz * p0

    # 		score = (sz - nextSz) * k
    # 		if k > 1 and score > bsfScore:
    # 			print("window {0}, k={1}, score={2} is the new best!".format(i, k, score))
    # 			print("sortedIdxs = {}".format(str(sortedIdxs)))
    # 			print("sortedIdxScores = {}".format(str(windowSims[i, sortedIdxs])))
    # 			print("------------------------")
    # 			bsfScore = score
    # 			bsfLocs = sortedIdxs[:k]
    # 			bsfIntersection = np.copy(filt)
    # 		# early abandon if this can't possibly beat the best score, which
    # 		# is the case exactly when the intersection is so small that perfect
    # 		# matches at all future locations still wouldn't be good enough
    # 		elif sz * numIdxs <= bsfScore:
    # 			# TODO can we actually early abandon here? next window loc
    # 			# could increase filt, and thus score for a given loc isn't
    # 			# necessarily non-increasing...
    # 			# 	-can't abandon using this test, but pretty sure there's
    # 			# 	a lower bound to be had here somewhere
    # 			# print("early abandoning window {} at k={}".format(i, k))
    # 			break

    # ------------------------ recover original ts

    bsfIntersection *= bsfIntersection >= minSim
    bsfIntersectionWindow = bsfIntersection.reshape((-1, windowLen))
    sums = np.sum(bsfIntersectionWindow, axis=0)

    kBest = len(bsfLocs)
    p0 = np.power(p0, kBest)
    # expectedOnesPerCol = p0 * X.shape[1] * 2
    # expectedOnesPerCol = p0 * X.shape[1]
    expectedOnesPerCol = p0 * X.shape[0]
    sums -= expectedOnesPerCol

    # plt.figure()
    # plt.plot(sums)

    start, end, _ = maxSubarray(sums)
    # patStart, patEnd = start, end + 1 + length
    patStart, patEnd = start, end + 1
    # patStart, patEnd = start + length // 2, end + 1 + length

    # ================================ show output

    print "bestScore = {}".format(bsfScore)
    print "bestLocations = {}".format(str(bsfLocs))

    for idx in bsfLocs:
        viz.plotRect(axSim, idx, idx + windowLen)

    # print bsfIntersectionWindow.shape
    # print sums.shape

    # plt.plot(sums)
    # viz.plotRect(plt.gca(), start, end + 1)
    for idx in bsfLocs:
        viz.plotRect(axSeq, idx + patStart, idx + patEnd)

    if answerIdxs is not None:
        for idx in answerIdxs:
            viz.plotVertLine(idx, ax=axSeq)

    plt.figure()
    plt.imshow(bsfIntersectionWindow, interpolation='nearest', aspect='auto')

    plt.tight_layout()
    plt.show()