def plotDataset(X, Y, name, img=False, normalizeCols=False, maxPerClass=np.inf, transforms=None): print("plotting dataset: " + name + "...") if normalizeCols: X = meanNormalizeCols(X) if img: imgExamples(X, Y, maxPerClass, transforms) plt.suptitle(name) else: plotExamples(X, Y, maxPerClass, normalize, transforms) plt.title(name) suffix = "" subdir = "" if img: suffix += "_img" subdir += "_img" if normalizeCols: suffix += "_colnormalized" subdir += "_colnormalized" if maxPerClass < np.inf: suffix += "_" + str(maxPerClass) subdir += "_" + str(maxPerClass) if transforms: for transform in transforms: name = '_' + str(transform.__name__) suffix += name subdir += name saveCurrentPlot(name, suffix=suffix, subdir=subdir)
def showClusters(name, X, Y, ks, lengths, clusterFactory=makeKMeans, stride=1, normSubSeqs=True): print("showing clusters for dataset %s" % name) plt.figure(figsize=(17, 9.5)) plt.rcParams["font.size"] = 18 plotNum = 0 plotRows = len(ks) plotCols = len(lengths) n = max(max(lengths), 64) # if we want k > 64, ts must be at least this long n = min(n, X.shape[1]) # can't be longer than ts print("Resampling and normalizing...") X2 = resampleToLengthN(X, n) X2 = zNormalizeRows(X2) print("Fitting and plotting...") for i, k in enumerate(ks): for j, l in enumerate(lengths): plotNum += 1 if l > n: # window longer than whole sequence continue subseqs = window.sliding_windows_of_rows(X2, l, stride) if normSubSeqs: subseqs = zNormalizeRows(subseqs) clusterer = clusterFactory(subseqs, k) print subseqs.shape print subseqs clusterer.fit(subseqs) # cluster centers are the actual means if hasattr(clusterer, 'cluster_centers_'): centers = clusterer.cluster_centers_ else: lbls = clusterer.labels_ # grouped = munge.groupXbyY(subseqs, lbls) grouped = groupXbyY(subseqs, lbls) centers = map(lambda rows: rows.mean(axis=0), grouped) plt.subplot(plotRows, plotCols, plotNum) for center in centers: plt.plot(np.arange(len(center)), center) plt.xticks(()) plt.yticks(()) plt.tight_layout() if i == 0: plt.title("Window length = %d" % l) if j == 0: plt.ylabel("K = %d" % k) algoName = clusterer.__class__.__name__ if normSubSeqs: algoName += '-Normalized' plt.suptitle("Clusters in %s Dataset using %s" % (name, algoName)) plt.tight_layout() plt.subplots_adjust(left=.03, right=.97, bottom=.01, top=.9, wspace=.02, hspace=.01) # all algos for each dataset in one dir, as well as subdir for each algo saveCurrentPlot("%s_%s.png" % (name, algoName), subdir='cluster') saveCurrentPlot("%s_%s.png" % (name, algoName), subdir=os.path.join('cluster', algoName)) plt.close()