def main(tseries_fpath, assign_fpath, centroids_fpath, plot_foldpath): initialize_matplotlib() X = np.genfromtxt(tseries_fpath)[:, 1:].copy() y = np.genfromtxt(assign_fpath) centroids = np.genfromtxt(centroids_fpath) num_classes = len(set(y)) for k in range(num_classes): centroid_plot_foldpath = os.path.join(plot_foldpath, str(k)) os.mkdir(centroid_plot_foldpath) centroid = centroids[k] plot_series(centroid, centroid_plot_foldpath, 'centroid', True) members = X[y == k] n_samples = members.shape[0] sample_rows = np.arange(n_samples) np.random.shuffle(sample_rows) members_to_plot = members[sample_rows[:10]] for i in range(members_to_plot.shape[0]): print(k, i) plot_series(members_to_plot[i], centroid_plot_foldpath, 'ex-%d' % i)
def main(features_fpath, classes_fpath, out_fpath, trans_fpath, col_to_use=2, is_text_features=False): initialize_matplotlib() classes = np.loadtxt(classes_fpath) if is_text_features: to_plot, sum_classes, labels = \ load_text_file(features_fpath, col_to_use, classes) ref = False else: to_plot, sum_classes, labels = \ load_svm_file(features_fpath, classes) ref = True trans = {} with open(trans_fpath) as f: for l in f: spl = l.split() trans[int(spl[0])] = int(spl[1]) data = generate_data_plot(to_plot, sum_classes, labels, classes) stacked_bars(labels, data, out_fpath, trans, ref)
def main(tseries_fpath, assign_fpath, centroids_fpath, plot_foldpath): initialize_matplotlib() X = np.genfromtxt(tseries_fpath)[:,1:].copy() y = np.genfromtxt(assign_fpath) centroids = np.genfromtxt(centroids_fpath) num_classes = len(set(y)) for k in xrange(num_classes): centroid_plot_foldpath = os.path.join(plot_foldpath, str(k)) os.mkdir(centroid_plot_foldpath) centroid = centroids[k] plot_series(centroid, centroid_plot_foldpath, 'centroid', True) members = X[y == k] n_samples = members.shape[0] sample_rows = np.arange(n_samples) np.random.shuffle(sample_rows) members_to_plot = members[sample_rows[:10]] for i in xrange(members_to_plot.shape[0]): print(k, i) plot_series(members_to_plot[i], centroid_plot_foldpath, 'ex-%d' % i)
def main(tseries_fpath, k, plot_foldpath): import mkl mkl.set_num_threads(16) initialize_matplotlib() X = np.genfromtxt(tseries_fpath)[:, 1:] aux = X.sum(axis=1) fix = np.where(aux == 0)[0] X[fix] += .001 #fixing zero only rows X = X.copy() cent, assign, shift, dists_cent = ksc.inc_ksc(X, k) for i in range(cent.shape[0]): t_series = cent[i] plt.plot(t_series, '-k') plt.gca().get_xaxis().set_visible(False) plt.gca().get_yaxis().set_visible(False) #plt.ylabel('Views') #plt.xlabel('Time') plt.savefig(os.path.join(plot_foldpath, '%d.pdf' % i)) plt.close() half = t_series.shape[0] // 2 to_shift = half - np.argmax(t_series) to_plot_peak_center = dist.shift(t_series, to_shift, rolling=True) plt.plot(to_plot_peak_center, '-k') plt.gca().get_xaxis().set_visible(False) plt.gca().get_yaxis().set_visible(False) #plt.ylabel('Views') #plt.xlabel('Time') plt.savefig(os.path.join(plot_foldpath, '%d-peak-center.pdf' % i)) plt.close() to_shift = 0 - np.argmin(t_series) to_plot_min_first = dist.shift(t_series, to_shift, rolling=True) plt.plot(to_plot_min_first, '-k') plt.gca().get_xaxis().set_visible(False) plt.gca().get_yaxis().set_visible(False) #plt.ylabel('Views') #plt.xlabel('Time') plt.savefig(os.path.join(plot_foldpath, '%d-min-first.pdf' % i)) plt.close() np.savetxt(os.path.join(plot_foldpath, 'cents.dat'), cent, fmt='%.5f') np.savetxt(os.path.join(plot_foldpath, 'assign.dat'), assign, fmt='%d') np.savetxt(os.path.join(plot_foldpath, 'shift.dat'), shift, fmt='%d') np.savetxt(os.path.join(plot_foldpath, 'dists_cent.dat'), dists_cent, fmt='%.5f')
def main(features_fpath, classes_fpath, out_fpath, trans_fpath, col_to_use=2, is_text_features=False): initialize_matplotlib() classes = np.loadtxt(classes_fpath) if is_text_features: to_plot, sum_classes, labels = load_text_file(features_fpath, col_to_use, classes) ref = False else: to_plot, sum_classes, labels = load_svm_file(features_fpath, classes) ref = True trans = {} with open(trans_fpath) as f: for l in f: spl = l.split() trans[int(spl[0])] = int(spl[1]) data = generate_data_plot(to_plot, sum_classes, labels, classes) stacked_bars(labels, data, out_fpath, trans, ref)
def main(features_fpath): initialize_matplotlib() X = np.genfromtxt(features_fpath)[:,1:] for r, k in sorted(refs.items()): idxs = X[:,k] > 0 time_to_ref = (X[:,UP_DATE][idxs] - X[:,k][idxs]) print(r, np.mean(time_to_ref), np.std(time_to_ref)) print('peak_frac', np.mean(X[:,-3]), np.std(X[:,-3])) time_to_peak = (X[:,-4] - X[:,UP_DATE]) / 7 print('peak_date', np.mean(time_to_peak), np.std(time_to_peak)) import time plt.hist(X[:,UP_DATE], bins=20) ticks, labels = plt.xticks() plt.xticks(ticks, [time.strftime('%m/%y', time.localtime(x)) for x in ticks]) plt.ylabel('\# Videos') plt.xlabel('Month/Year') plt.savefig('hist.pdf')
def main(features_fpath): initialize_matplotlib() X = np.genfromtxt(features_fpath)[:, 1:] for r, k in sorted(refs.items()): idxs = X[:, k] > 0 time_to_ref = (X[:, UP_DATE][idxs] - X[:, k][idxs]) print(r, np.mean(time_to_ref), np.std(time_to_ref)) print('peak_frac', np.mean(X[:, -3]), np.std(X[:, -3])) time_to_peak = (X[:, -4] - X[:, UP_DATE]) / 7 print('peak_date', np.mean(time_to_peak), np.std(time_to_peak)) import time plt.hist(X[:, UP_DATE], bins=20) ticks, labels = plt.xticks() plt.xticks(ticks, [time.strftime('%m/%y', time.localtime(x)) for x in ticks]) plt.ylabel('\# Videos') plt.xlabel('Month/Year') plt.savefig('hist.pdf')
def main(features_fpath, classes_fpath, user_users=False): initialize_matplotlib() classes = np.loadtxt(classes_fpath) num_classes = len(set(classes)) to_compare = load_text_file(features_fpath, classes, user_users) print(end='\t') for i in range(num_classes): print(i, end='\t') print() for j in range(num_classes): print(j, end='\t') for i in range(num_classes): first_set = to_compare[i] second_set = to_compare[j] asym_j = asym_jaccard(first_set, second_set) print('%.3f' % asym_j, end='\t') print()
def main(features_fpath, classes_fpath, user_users=False): initialize_matplotlib() classes = np.loadtxt(classes_fpath) num_classes = len(set(classes)) to_compare = load_text_file(features_fpath, classes, user_users) print(end='\t') for i in xrange(num_classes): print(i, end='\t') print() for j in xrange(num_classes): print(j, end='\t') for i in xrange(num_classes): first_set = to_compare[i] second_set = to_compare[j] asym_j = asym_jaccard(first_set, second_set) print('%.3f' % asym_j, end='\t') print()
def main(tseries_fpath, plot_foldpath): assert os.path.isdir(plot_foldpath) initialize_matplotlib() X = np.genfromtxt(tseries_fpath)[:, 1:].copy() n_samples = X.shape[0] sample_rows = np.arange(n_samples) clust_range = range(2, 16) n_clustering_vals = len(clust_range) intra_array = np.zeros(shape=(25, n_clustering_vals)) inter_array = np.zeros(shape=(25, n_clustering_vals)) bcvs_array = np.zeros(shape=(25, n_clustering_vals)) costs_array = np.zeros(shape=(25, n_clustering_vals)) r = 0 for i in xrange(5): np.random.shuffle(sample_rows) rand_sample = sample_rows[:200] X_new = X[rand_sample] D_new = dist.dist_all(X_new, X_new, rolling=True)[0] for j in xrange(5): for k in clust_range: intra, inter, bcv, cost = run_clustering(X_new, k, D_new) intra_array[r, k - 2] = intra inter_array[r, k - 2] = inter bcvs_array[r, k - 2] = bcv costs_array[r, k - 2] = cost r += 1 print(r) intra_err = np.zeros(n_clustering_vals) inter_err = np.zeros(n_clustering_vals) bcvs_err = np.zeros(n_clustering_vals) costs_err = np.zeros(n_clustering_vals) for k in clust_range: j = k - 2 intra_err[j] = hci(intra_array[:, j], 0.95) inter_err[j] = hci(inter_array[:, j], 0.95) bcvs_err[j] = hci(bcvs_array[:, j], 0.95) costs_err[j] = hci(costs_array[:, j], 0.95) plt.errorbar(clust_range, np.mean(inter_array, axis=0), fmt="gD", label="Inter Cluster", yerr=inter_err) plt.errorbar(clust_range, np.mean(bcvs_array, axis=0), fmt="bo", label="BetaCV", yerr=bcvs_err) plt.errorbar(clust_range, np.mean(intra_array, axis=0), fmt="rs", label="Intra Cluster", yerr=intra_err) plt.ylabel("Average Distance") plt.xlabel("Number of clusters") plt.xlim((0.0, 16)) plt.ylim((0.0, 1.0)) plt.legend(frameon=False, loc="lower left") plt.savefig(os.path.join(plot_foldpath, "bcv.pdf")) plt.close() plt.errorbar(clust_range, np.mean(costs_array, axis=0), fmt="bo", label="Cost", yerr=costs_err) plt.ylabel("Cost (F)") plt.xlabel("Number of clusters") plt.xlim((0.0, 16)) plt.ylim((0.0, 1.0)) plt.legend(frameon=False, loc="lower left") plt.savefig(os.path.join(plot_foldpath, "cost.pdf")) plt.close()
def main(tseries_fpath, plot_foldpath): assert os.path.isdir(plot_foldpath) initialize_matplotlib() X = np.genfromtxt(tseries_fpath)[:, 1:].copy() n_samples = X.shape[0] sample_rows = np.arange(n_samples) clust_range = range(2, 16) n_clustering_vals = len(clust_range) intra_array = np.zeros(shape=(25, n_clustering_vals)) inter_array = np.zeros(shape=(25, n_clustering_vals)) bcvs_array = np.zeros(shape=(25, n_clustering_vals)) costs_array = np.zeros(shape=(25, n_clustering_vals)) r = 0 for i in xrange(5): np.random.shuffle(sample_rows) rand_sample = sample_rows[:200] X_new = X[rand_sample] D_new = dist.dist_all(X_new, X_new, rolling=True)[0] for j in xrange(5): for k in clust_range: intra, inter, bcv, cost = run_clustering(X_new, k, D_new) intra_array[r, k - 2] = intra inter_array[r, k - 2] = inter bcvs_array[r, k - 2] = bcv costs_array[r, k - 2] = cost r += 1 print(r) intra_err = np.zeros(n_clustering_vals) inter_err = np.zeros(n_clustering_vals) bcvs_err = np.zeros(n_clustering_vals) costs_err = np.zeros(n_clustering_vals) for k in clust_range: j = k - 2 intra_err[j] = hci(intra_array[:, j], .95) inter_err[j] = hci(inter_array[:, j], .95) bcvs_err[j] = hci(bcvs_array[:, j], .95) costs_err[j] = hci(costs_array[:, j], .95) plt.errorbar(clust_range, np.mean(inter_array, axis=0), fmt='gD', label='Inter Cluster', yerr=inter_err) plt.errorbar(clust_range, np.mean(bcvs_array, axis=0), fmt='bo', label='BetaCV', yerr=bcvs_err) plt.errorbar(clust_range, np.mean(intra_array, axis=0), fmt='rs', label='Intra Cluster', yerr=intra_err) plt.ylabel('Average Distance') plt.xlabel('Number of clusters') plt.xlim((0., 16)) plt.ylim((0., 1.)) plt.legend(frameon=False, loc='lower left') plt.savefig(os.path.join(plot_foldpath, 'bcv.pdf')) plt.close() plt.errorbar(clust_range, np.mean(costs_array, axis=0), fmt='bo', label='Cost', yerr=costs_err) plt.ylabel('Cost (F)') plt.xlabel('Number of clusters') plt.xlim((0., 16)) plt.ylim((0., 1.)) plt.legend(frameon=False, loc='lower left') plt.savefig(os.path.join(plot_foldpath, 'cost.pdf')) plt.close()