def experiment1(n, d, repeat, threashold): """ experiment repeat time on measuring the connected components of Gaussian random matrix :param n: number of data points :param d: dimensions :param repeat: number of repetition :param threashold: threshold of edges :return: statistic metrics """ mu = np.zeros(repeat) sigma = np.zeros(repeat) min_size = np.zeros(repeat) max_size = np.zeros(repeat) num_groups = np.zeros(repeat) for x in xrange(repeat): print 'epoch %d' % x X = np.random.randn(n, d) cross_correlation = utils.xcorr(np.abs(X)) G = graph.gen_corr_graph(cross_correlation, threashold) connected_comp = nx.connected_component_subgraphs(G) num_nodes = np.array( [len(conncomp.nodes()) for conncomp in connected_comp]) min_size[x] = min(num_nodes) max_size[x] = max(num_nodes) sigma[x] = np.std(num_nodes) mu[x] = np.mean(num_nodes) num_groups[x] = len(num_nodes) return mu, sigma, min_size, max_size, num_groups
def split_data(X, P, split_mode): """ split data for parallel processing at random :param X: data :param P: number of cores :param split_mode: split mode :return: list of P sequences """ if split_mode == 'random': n = X.shape[0] random_seq = np.random.permutation(n) seq_par = [random_seq[x::P] for x in range(P)] elif split_mode == 'cross-correlation': cc, _ = utils.xcorr(np.abs(X)) G = graph.gen_corr_graph(np.abs(cc)) subGs = graph.split_evenly(G, P) seq_par = [x.nodes() for x in subGs] return seq_par
print('Parallel SGD (random split) maximum learning rate=%f' % gamma2) print('train accuracy:%f, test accuracy:%f' % (accuracy(sm2.predict(X), oneHotDecode(y)), accuracy(sm2.predict(mnist.test.images), oneHotDecode(mnist.test.labels)))) # seq_par = sgd.split_data(X, P, 'cross-correlation') # gamma3 = sgd.max_learning_rate(sgd.parallel_sgd, softmax_learner, 0.001, 5, 0.1, # data_partition=seq_par, X=X, y=y, max_iter=20, tol=1e-10, P=P) # print ('Parallel SGD (random split) maximum learning rate=%f' % gamma3) # sm3, objs3, time_cost3 = sgd.parallel_sgd(softmax_learner, X, y, data_partition=seq_par, max_iter=max_iter, gamma=gamma3 * 0.8, P=P, tol=1e-3) # print('train accuracy:%f, test accuracy:%f' % (accuracy(sm3.predict(X), oneHotDecode(y)), # accuracy(sm3.predict(mnist.test.images), oneHotDecode(mnist.test.labels)))) max_deg = 100 cc, _ = utils.xcorr(X) G = graph.gen_corr_graph(cc, max_deg=max_deg) cg = graph.ConflictGraph(G) gamma4, sm4, objs4, time_cost4 = sgd.max_learning_rate(sgd.parallel_sgd, softmax_learner, 0.001, 5, 0.1, data_partition=cg, X=X, y=y, max_iter=max_iter * max_deg, tol=1e-10, P=P) print('Parallel SGD (CYCLADES) maximum learning rate=%f' % gamma4)
import numpy as np from misc import utils from SGDs import sgd, loss_functions, graph import matplotlib.pyplot as plt import matplotlib import os if __name__ == '__main__': n, d = 1000, 100 np.random.seed(0) X = np.random.randn(n, d) w = np.random.uniform(low=0.0, high=1.0, size=(d, )) y = np.dot(X, w) w0 = np.zeros(d) cc, ncc = utils.xcorr(X) max_degrees = range(0, n + 1, 20) max_degrees_actual = [] ths = [] num_edges = [] per_edges_used = [] for max_deg in max_degrees: G, th = graph.gen_corr_graph(np.abs(ncc), max_deg=max_deg) ths.append(th) max_degrees_actual.append(max(list(G.degree().values()))) num_edges.append(G.number_of_edges()) per_edges_used.append(num_edges[-1] / float(n * (n - 1) / 2)) print(max_degrees_actual) print(ths) print(num_edges)
""" display 6 data points and their correlation """ import numpy as np from misc import utils from SGDs import graph import os # import data from tensorflow.examples.tutorials.mnist import input_data if __name__ == '__main__': mnist = input_data.read_data_sets(os.path.join('data', 'MNIST'), one_hot=True) n = 6 X, y = mnist.train.next_batch(n) C = utils.xcorr(X) print C
import matplotlib.pyplot as plt import networkx as nx from SGDs import graph import time # import data from tensorflow.examples.tutorials.mnist import input_data if __name__ == '__main__': mnist_path = os.path.join('data', 'MNIST') mnist = input_data.read_data_sets(mnist_path) # only pick the first n data n = 1000 img, labels = mnist.train.next_batch(n) cross_correlation, cc_re = utils.xcorr(img) # hist, bin_edges = np.histogram(cross_correlation.ravel(), bins=200) # the histogram of the data fig = plt.figure(num=1, figsize=(20, 12)) n, bins, patches = plt.hist(cc_re.ravel(), bins=200, normed=1, facecolor='green', alpha=0.75) # add a 'best fit' line plt.xticks(fontsize=20) plt.yticks(fontsize=20) plt.xlabel('correlation', fontsize=20) plt.ylabel('frequency', fontsize=20)
# for label in xrange(c): # frames = [] # for i in xrange(0, len(data_by_number[label]), 100): # frames.append(Image.fromarray(np.uint8(255*data_by_number[label][i].reshape((28, 28))))) # frames[-1].save(os.path.join('results', 'real_data', 'MNIST', # 'sample_images', 'digit_%d_%i.jpg' % (label, i))) # compute correlation # cc, ncc = [], [] fig = plt.figure(num=1, figsize=(20, 12)) gs = gridspec.GridSpec(c, c) for i in xrange(c): # cc.append([]) # ncc.append([]) for j in xrange(c): print('digit %d - digit %d' % (i, j)) cc0, ncc0 = utils.xcorr(data_by_number[i], data_by_number[j]) # cc[-1].append(cc0) # ncc[-1].append(ncc0) ax = fig.add_subplot(gs[i, j]) utils.plot_hist(ncc0.ravel(), ax, num_bins=200) plt.tight_layout() save_dir = os.path.join('results', 'real_data', 'MNIST', 'correlation_500', 'mnist_ncc_hist_all.pdf') fig.savefig(save_dir) # # cross_correlation, cc_re = utils.xcorr(imgs) # hist, bin_edges = np.histogram(cross_correlation.ravel(), bins=200)
G = graph.gen_corr_graph(cross_correlation, threashold) connected_comp = nx.connected_component_subgraphs(G) num_nodes = np.array( [len(conncomp.nodes()) for conncomp in connected_comp]) min_size[x] = min(num_nodes) max_size[x] = max(num_nodes) sigma[x] = np.std(num_nodes) mu[x] = np.mean(num_nodes) num_groups[x] = len(num_nodes) return mu, sigma, min_size, max_size, num_groups if __name__ == '__main__': n, d = 500, 100 X = np.random.randn(n, d) cc = utils.xcorr(np.abs(X)) c_sort = np.sort(np.abs(cc.ravel())) threashold = 0.75 repeat = 10 mu, sigma, min_size, max_size, num_groups = experiment1( n, d, repeat, threashold) print mu print sigma print min_size print max_size print num_groups X = np.zeros((repeat, 5)) X[:, 0] = mu X[:, 1] = sigma X[:, 2] = min_size X[:, 3] = max_size