Exemplo n.º 1
0
def main():
	return

	from flock import learnFFfromSeq
	from dig import datasets

	np.random.seed(12345)

	# read in the first few time series from the TIDIGITS dataset; the return
	# value is a collection of LabeledTimeSeries (see datasets.utils). You
	# will of course need to have the relevant dataset on your machine, as
	# well as update datasets/paths.py to point to it. For TIDIGITS
	# specifically, you will also need to have librosa installed. For the
	# UCR datasets, the whichExamples argument takes this many examples from
	# all 20 datasets
	whichExamples = np.arange(2)
	tsList = datasets.loadDataset(datasets.TIDIGITS, whichExamples=whichExamples)

	# uncomment any of these to use a different dataset
	# tsList = datasets.loadDataset(datasets.DISHWASHER, whichExamples=whichExamples)
	# tsList = datasets.loadDataset(datasets.MSRC, whichExamples=whichExamples)
	# tsList = datasets.loadDataset(datasets.UCR, whichExamples=[0])

	Lmin, Lmax = 1./20, 1./10 # fractions of time series length
	for ts in tsList:
		startIdxs, endIdxs, model, featureMat, featureMatBlur = learnFFfromSeq(
			ts.data, Lmin, Lmax)
		plotFFOutput(ts, startIdxs, endIdxs, featureMat, model)

		# you can also call this if you just want to see what the data looks like
		# ts.plot()

		# plt.savefig(ts.name + '.pdf') # use this to save it

		plt.show()
Exemplo n.º 2
0
def main():
	# tsList = ds.loadDataset(ds.SINES)
	# tsList = ds.loadDataset(ds.MSRC, whichExamples=range(1))
	# tsList = ds.loadDataset(ds.MSRC, whichExamples=range(2))
	# tsList = ds.loadDataset(ds.DISHWASHER, whichExamples=range(1))
	tsList = ds.loadDataset(ds.DISHWASHER, whichExamples=range(5))
	# tsList = ds.loadDataset(ds.TIDIGITS, whichExamples=range(2))
	# tsList = ds.loadDataset(ds.TIDIGITS, whichExamples=range(10))

	for ts in tsList:

		# ts.data = ts.data[:, :5]
		# ts.data = ts.data[::2, :]

		# make it have 3 signals with increasing amounts of noise
		# n = ts.data.shape[0]
		# noiseDim1 = ts.data + np.random.randn(n, 1) * .25
		# noiseDim2 = np.random.randn(n).reshape((-1, 1))
		# ts.data = np.hstack((ts.data, noiseDim1, noiseDim2))

		# seq = ts.data.ravel().reshape((-1, 1))
		# print seq.shape

		# print ts.data.shape
		# return

		# Lmin = .1
		# Lmax = .2
		Lmin = .05
		Lmax = .1
		Lmin_int = int(Lmin * len(ts.data))
		Lmax_int = int(Lmax * len(ts.data))

		# ts.data = ts.data[:, ::5] # downsample by factor of 5

		# plt.plot(ts.data[:, :5])
		# plt.show()
		# return

		ff = dig.FlockLearner()
		ff.learn(ts.data.T, Lmin, Lmax)

		T = ff.getTimeSeries()
		Phi = ff.getFeatureMat()
		PhiBlur = ff.getBlurredFeatureMat()
		W = ff.getPattern()
		Wsum = ff.getPatternSum()
		starts = ff.getInstanceStartIdxs()
		ends = ff.getInstanceEndIdxs()
		windowLen = ff.getWindowLen()

		print "windowLen: ", windowLen

		# scores = ff.getSeedScores()
		seeds = ff.getSeeds()


		# startIdxs, endIdxs, model, X, Xblur = flock.learnFFfromSeq(ts.data, Lmin,
		# 	Lmax)


		# ts.plot()

		printCandidates = False
		if printCandidates:
			FPhi = Phi
			FPhiBlur = PhiBlur
			for FPhi, FPhiBlur in [(Phi, PhiBlur), (X, Xblur)]:
				for seed in seeds:

					# Lmax = int(len(ts.data))
					# dotProds = dig.dotProdsForSeed(FPhi, FPhiBlur, seed, windowLen)
					dotProds = dig.dotProdsForSeed(FPhi, FPhiBlur, seed, windowLen)
					dotProds2 = flock.dotProdsForSeed(FPhi, FPhiBlur, seed, windowLen)

					# plt.figure()
					# plt.plot(dotProds)
					# plt.plot(dotProds2)

					candidates = dig.candidatesFromDotProds(dotProds, Lmin_int)
					candidates2 = flock.candidatesFromDotProds(dotProds, Lmin_int)

					# candidates = ff.getCandidatesForSeed(FPhi, FPhiBlur, long(seed))
					# candidates2 = flock.get
					print "{}) {} vs {}".format(seed, candidates, candidates2)

					assert np.allclose(dotProds, dotProds2)

					# cpp impl seems to be much more accurate than python
					# version, which has a known bug in the func that
					# finds spaced relative maxima
					# if len(candidates) != len(candidates2):
					# 	plt.plot(dotProds)
					# 	plt.plot(dotProds2)
					# 	plt.show()
					# 	assert(False)

		# okay, so it's receiving, storing, and returning mats correctly
		# print "T shape", T.shape
		# ts.plot()
		# plt.plot(T.T)
		# plt.title("Orig TS and TS from FF object")

		# alright, seed scores look awesome...
		# scores = ff.getSeedScores()
		# plt.figure()
		# plt.plot(scores)

		# are structure score computations the same?
		# -> little bit of noise, but basically yes
		# subseqLen = 40
		# walks = dig.createRandomWalks(seq.ravel(), subseqLen, 100)
		# structureScores = dig.structureScores1D(seq.ravel(), subseqLen, walks)
		# structureScores2 = feat.windowScoresRandWalk(ts.data, subseqLen)
		# plt.figure()
		# plt.plot(structureScores / np.max(structureScores))
		# plt.plot(structureScores2)

		# are seed scores about right?
		# scores = ff.getSeedScores()
		# scores2 = flock._seedScores(seq, Lmin_int)
		# plt.figure()
		# plt.plot(scores)
		# plt.plot(scores2)


		# okay, so python impl nails it; clearly we have a bug somewhere...
		# startIdxs, endIdxs, model, X, Xblur = flock.learnFFfromSeq(ts.data, Lmin,
		# 	Lmax)
		# plt.figure()
		# viz.imshowBetter(model)
		# plt.colorbar()
		# plt.figure()
		# viz.imshowBetter(X)
		# plt.colorbar()
		# plt.figure()
		# viz.imshowBetter(Xblur)
		# plt.colorbar()

		print "cpp startIdxs:", starts
		print "cpp endIdxs:", ends
		# print "python startIdxs:", startIdxs
		# print "python endIdxs:", endIdxs

		# random walks seem to be good
		# walks = dig.createRandomWalks(ts.data.ravel(), 80, 100)
		# plt.plot(walks.T)
		# for walk in walks:
		# 	if np.abs(np.mean(walk)) > .0001:
		# 		print np.mean(walk)
		# 		print "walk is not zero-meaned!"

		# phi has reasonable shape (a few rows, same num cols = ts len)
		# print "Phi shape", Phi.shape
		# print "PhiBlur shape", PhiBlur.shape

		# print "Phi:"
		# for row in Phi[:20]:
		# 	print row

		ploFeatureMats = 0
		if ploFeatureMats:
			# yep, phi and phi_blur look right
			# well, sort of...phi consistently has fewer 1s...
			_, axes = plt.subplots(2,2)
			viz.imshowBetter(Phi, ax=axes[0,0])
			viz.imshowBetter(PhiBlur, ax=axes[1,0])
			viz.imshowBetter(X, ax=axes[0,1])
			viz.imshowBetter(Xblur, ax=axes[1,1])

		# ya, phi_blur and blurring of phi using python code are identical
		# Lfilt = int(Lmin * len(ts.data))
		# print "Lfilt", Lfilt
		# # PhiBlur2 = _filterRows(Phi, Lfilt)
		# _, PhiBlur2 = preprocessFeatureMat(Phi, Lfilt)

		# plt.figure()
		# viz.imshowBetter(PhiBlur2)
		# plt.colorbar()

		# for row in W:
		# 	print row

		# print "True pattern sum", Wsum
		# print "sum of rx'd pattern", np.sum(W)
		# print "W nonzeros: ", np.nonzero(W)

		# plt.figure()
		# viz.imshowBetter(W)
		# plt.colorbar()

		# print "python Phi shape, Phi sum, PhiBlur sum", X.shape, np.sum(X), np.sum(Xblur)
		print "cpp    Phi shape, Phi sum, PhiBlur sum", Phi.shape, np.sum(Phi), np.sum(PhiBlur)

		plotOutput = False
		# plotOutput = True
		if plotOutput:
			axSeq, axSim, axPattern = flockviz.plotFFOutput(ts, starts, ends, Phi, W)
			# flockviz.plotFFOutput(ts, starts, ends, PhiBlur, W)
			axSeq.set_title("cpp output")

			# axes = flockviz.plotFFOutput(ts, startIdxs, endIdxs, X, W)
			# axes[0].set_title("python output")

	plt.show()