def mult_test(*args): print 'mult' size = (2000, 2000) X = np.random.uniform(-1, 1, size) tic() XX = X.T.dot(X) toc()
def _run_experiment_args(self, results_file, data_and_splits, method_results, i_labels, split): num_labels = self.configs.num_labels[i_labels] s = str(num_labels) + '-' + str(split) curr_results = _load_temp_split_file(results_file, num_labels, split) if curr_results: return curr_results if mpi_utility.is_master(): timer.tic() #print 'num_labels-split: ' + s temp_file_name = _temp_split_file_name(results_file, num_labels, split) temp_dir_root = helper_functions.remove_suffix(temp_file_name, '.pkl') temp_dir = temp_dir_root + '/CV-temp/' curr_data = data_and_splits.get_split(split, num_labels) learner = self.configs.learner curr_learner = copy.deepcopy(learner) curr_learner.split_idx_str = s curr_learner.temp_dir = temp_dir curr_results = curr_learner.train_and_test(curr_data) if mpi_utility.is_group_master(): helper_functions.save_object(_temp_split_file_name(results_file,num_labels,split),curr_results) helper_functions.delete_dir_if_exists(temp_dir_root) instance_subset = learner.configs.instance_subset results_features = learner.configs.results_features test_error_to_print = 'is_train' if mpi_utility.is_group_master(): if hasattr(curr_learner, 'best_params'): print s + '-' + str(curr_learner.best_params) + ' Error: ' + \ str(curr_results.compute_error(self.configs.loss_function, results_features, test_error_to_print)) else: print s + ' Done' if mpi_utility.is_master(): timer.toc() return curr_results
def nystrom_woodbury_laplacian(X, lamb, perc_columns, W=None, C=None, D=None, v=None): lamb = float(lamb) timing_test = False if timing_test: tic() if W is None or C is None: W, C = nystrom(X, perc_columns) #W_inv = np.linalg.pinv(W) #X_n = X.shape[0] d = X.sum(1) dl_inv = 1 / (d + lamb) inv_approx = None vProd = None fast_solver = True if fast_solver: CTA = C.T * dl_inv B_inv = np.linalg.pinv(-W + CTA.dot(C)) if v is not None: assert False, 'Make sure this works' v1 = CTA.dot(v) v2 = B_inv.dot(v1) v3 = -C.dot(v2) v4 = v3 + v v5 = dl_inv * v4 vProd = v5 else: T = -C.dot(B_inv).dot(CTA) T[np.diag_indices_from(T)] += 1 inv_approx = dl_inv[:, None] * T ''' vProd = inv_approx.dot(v) err = norm(vProd - v5) / norm(vProd) print str(err) print '' ''' else: A_inv = np.diag(1 / (d + lamb)) CTA = C.T.dot(A_inv) B_inv = np.linalg.pinv(-W + CTA.dot(C)) inv_approx = A_inv - A_inv.dot(C).dot(B_inv).dot(CTA) #inv_approx = A_inv.dot(np.eye(A_inv.shape[0]) - C.dot(B_inv).dot(CTA)) ''' ''' #print 'optimized approx error: ' + str(norm(inv_approx-inv_approx2)) if timing_test: toc() tic() inv_actual = np.linalg.inv(lamb * np.eye(X.shape[0]) + np.diag(d) - X) print 'Nystrom-Woodbery error: ' + str( norm(inv_approx - inv_actual) / norm(inv_actual)) toc() return inv_approx, vProd
def normal_test(*args): n = 5000 p = 2000 X = np.random.uniform(-1, 1, (n, p)) C = 1e-3 y = np.random.uniform(-1, 1, n) tic() A = X.T.dot(X) + C * np.eye(p) k = X.T.dot(y) w = np.linalg.solve(A, k) toc()
def normal_test(*args): n = 5000 p = 2000 X = np.random.uniform(-1, 1, (n, p)) C = 1e-3 y = np.random.uniform(-1, 1, n) tic() A = X.T.dot(X) + C*np.eye(p) k = X.T.dot(y) w = np.linalg.solve(A, k) toc()
def cvx_test(*args): n = 5000 p = 100 X = np.random.uniform(-1, 1, (n, p)) C = 1e-3 y = np.random.uniform(-1, 1, n) w = cvx.Variable(p) loss = cvx.sum_entries(cvx.square(X * w - y)) reg = cvx.norm2(w)**2 obj = cvx.Minimize(loss + C * reg) prob = cvx.Problem(obj, []) tic() prob.solve(solver=cvx.SCS, verbose=False) toc()
def cvx_test(*args): n = 5000 p = 100 X = np.random.uniform(-1,1,(n,p)) C = 1e-3 y = np.random.uniform(-1,1, n) w = cvx.Variable(p) loss = cvx.sum_entries(cvx.square(X*w - y)) reg = cvx.norm2(w)**2 obj = cvx.Minimize(loss + C*reg) prob = cvx.Problem(obj, []) tic() prob.solve(solver=cvx.SCS, verbose=False) toc()
def run_main(num_labels=None, split_idx=None, no_viz=None, configs=None, comm=None): import argparse import sys #print sys.argv parser = argparse.ArgumentParser() parser.add_argument('-num_labels', type=int) parser.add_argument('-split_idx', type=int) parser.add_argument('-no_viz', action='store_true') arguments = parser.parse_args(sys.argv[1:]) if num_labels is not None: arguments.num_labels = num_labels if split_idx is not None: arguments.split_idx = split_idx if no_viz is not None: arguments.no_viz = no_viz configs_lib.comm = comm if test_mpi: from mpi4py import MPI print str(MPI.COMM_WORLD.Get_rank()) + '-' + str(arguments.num_labels) + '-' + str(arguments.split_idx) return configs_lib.arguments = arguments import warnings #print 'Ignoring Deprecation Warnings' warnings.filterwarnings("ignore",category=DeprecationWarning) from mpi4py import MPI comm = MPI.COMM_WORLD if MPI.COMM_WORLD.Get_size() > 1: if mpi_utility.is_group_master(): print '(' + socket.gethostname() + ')''Process ' + str(comm.Get_rank()) + ': Starting experiments...' else: print 'Starting experiments...' if mpi_utility.is_group_master(): timer.tic() if configs_lib.run_experiments: run_experiments(configs) if mpi_utility.is_group_master(): timer.toc() if helper_functions.is_laptop(): import winsound winsound.Beep(440, 1000) if helper_functions.is_laptop() and not arguments.no_viz and MPI.COMM_WORLD.Get_size() == 1: vis_configs = configs_lib.VisualizationConfigs() if vis_configs.vis_table: create_table() else: run_visualization()
A = X.T.dot(X) + C * np.eye(p) k = X.T.dot(y) w = np.linalg.solve(A, k) toc() def cvx_test(*args): n = 5000 p = 100 X = np.random.uniform(-1, 1, (n, p)) C = 1e-3 y = np.random.uniform(-1, 1, n) w = cvx.Variable(p) loss = cvx.sum_entries(cvx.square(X * w - y)) reg = cvx.norm2(w)**2 obj = cvx.Minimize(loss + C * reg) prob = cvx.Problem(obj, []) tic() prob.solve(solver=cvx.SCS, verbose=False) toc() if __name__ == '__main__': comm = MPI.COMM_WORLD is_master = comm.Get_rank() == 0 if is_master: tic() run_test() if is_master: toc()
def predict(self, data): # d = data_lib.Data(np.expand_dims(data.source_y_pred, 1), data.y) y_pred_source = data.source_y_pred I = np.arange(y_pred_source.size) if self.predict_sample is not None and self.predict_sample < y_pred_source.size: I = np.random.choice(y_pred_source.size, self.predict_sample, replace=False) if self.use_rbf: #L = array_functions.make_laplacian(y_pred_source[I], self.sigma_tr) W_source_pred = array_functions.make_rbf(y_pred_source[I], self.sigma_tr) if self.oracle_guidance is not None: y = data.true_y[I] n_y = y.size num_to_sample = math.ceil(self.oracle_guidance * n_y**2) rand_index1 = np.random.choice(n_y, int(num_to_sample), replace=True) rand_index2 = np.random.choice(n_y, int(num_to_sample), replace=True) if self.oracle_guidance_binary: target_distances = array_functions.make_graph_distance(y) distance_threshold = .2 * (y.max() - y.min()) W_source_pred[rand_index1, rand_index2] = target_distances[ rand_index1, rand_index2] <= distance_threshold W_source_pred[rand_index2, rand_index1] = target_distances[ rand_index2, rand_index1] <= distance_threshold else: y_scaled = array_functions.normalize(y) * ( y_pred_source.max() - y_pred_source.min()) W_oracle_pred = array_functions.make_rbf( y_scaled, self.sigma_tr) W_source_pred[rand_index1, rand_index2] = W_oracle_pred[rand_index1, rand_index2] W_source_pred[rand_index2, rand_index1] = W_oracle_pred[rand_index2, rand_index1] W = array_functions.make_rbf(self.transform.transform(self.x), self.sigma_nw, x2=self.transform.transform( data.x[I, :])).T else: assert self.oracle_guidance is None k_L = int(self.sigma_tr * I.size) #L = array_functions.make_laplacian_kNN(y_pred_source[I], k_L) W_source_pred = array_functions.make_knn(y_pred_source[I], k_L) k_W = int(self.sigma_nw * self.x.shape[0]) W = array_functions.make_knn(self.transform.transform( data.x[I, :]), k_W, x2=self.transform.transform(self.x)) sparsify_prediction_graph = False if self.use_prediction_graph_radius: sparsify_prediction_graph = True W_sparse = array_functions.make_graph_radius( self.transform.transform(data.x[I, :]), radius=self.radius, ) if self.use_prediction_graph_sparsification: sparsify_prediction_graph = True W_sparse = array_functions.make_knn(self.transform.transform( data.x[I, :]), self.k_sparsification, normalize_entries=False) #W_L = array_functions.make_knn(y_pred_source[I], k_L) if sparsify_prediction_graph: W_source_pred = W_source_pred * W_sparse S = array_functions.make_smoothing_matrix(W) timing_test = False C = self.C * self.x.shape[0] / W_source_pred[:].sum() if self.nystrom_percentage > 0 or timing_test: if timing_test: tic() Sy = S.dot(self.y) if C != 0: lamb = 1 / float(C) f = None tic() inv_approx, _ = array_functions.nystrom_woodbury_laplacian( W_source_pred, lamb, self.nystrom_percentage) self.predict_time = toc() #_, f2 = array_functions.nystrom_woodbury_laplacian(W_source_pred, lamb, self.nystrom_percentage, v=Sy) if f is not None: f *= lamb else: inv_approx *= lamb f = inv_approx.dot(Sy) else: f = Sy if timing_test: toc() if self.nystrom_percentage == 0 or self.nystrom_percentage is None or timing_test: if timing_test: tic() L = array_functions.make_laplacian_with_W(W_source_pred, normalized=False) A = np.eye(I.size) + C * L try: tic() f = np.linalg.lstsq(A, S.dot(self.y))[0] self.predict_time = toc() except: print 'GraphTransferNW:predict failed, returning mean' f = self.y.mean() * np.ones(data.true_y.shape) if timing_test: toc() if timing_test: A_inv = np.linalg.inv(A) print 'approx error: ' + str( norm(inv_approx - A_inv) / norm(A_inv)) o = results.Output(data) if self.predict_sample is not None: nw_data = data_lib.Data(data.x[I, :], f) self.nw_learner.train_and_test(nw_data) nw_output = self.nw_learner.predict(data) o.y = nw_output.y o.fu = nw_output.y else: o.y = f o.fu = f return o
continue if comm.Get_rank() == 0: timer.tic() num_labels_list = list( itertools.product(c.num_labels, range(c.num_splits))) no_viz = False pool.map(mpi_run_main_args, [n + ( no_viz, c, ) for n in num_labels_list]) pool.close() if comm.Get_rank() == 0: print 'TOTAL TIME:' timer.toc() main.run_main(configs=c) else: assert False, 'Use MPI instead!' if use_multiprocessing_pool: pool = multiprocessing_utility.LoggingPool(processes=pool_size) pool.map(launch_subprocess_args, num_labels_list) else: for i in num_labels_list: launch_subprocess_args(i) comm = MPI.COMM_WORLD if comm.Get_rank() == 0: print 'TOTAL TIME:' timer.toc() main.run_main()
A = X.T.dot(X) + C*np.eye(p) k = X.T.dot(y) w = np.linalg.solve(A, k) toc() def cvx_test(*args): n = 5000 p = 100 X = np.random.uniform(-1,1,(n,p)) C = 1e-3 y = np.random.uniform(-1,1, n) w = cvx.Variable(p) loss = cvx.sum_entries(cvx.square(X*w - y)) reg = cvx.norm2(w)**2 obj = cvx.Minimize(loss + C*reg) prob = cvx.Problem(obj, []) tic() prob.solve(solver=cvx.SCS, verbose=False) toc() if __name__ == '__main__': comm = MPI.COMM_WORLD is_master = comm.Get_rank() == 0 if is_master: tic() run_test() if is_master: toc()
comm = MPI.COMM_WORLD for c in batch_configs.config_list: if results_exist(c): if comm.Get_rank() == 0: print 'Skipping: ' + c.results_file continue if comm.Get_rank() == 0: timer.tic() num_labels_list = list(itertools.product(c.num_labels, range(c.num_splits))) no_viz = False pool.map(mpi_run_main_args, [n + (no_viz, c, ) for n in num_labels_list]) pool.close() if comm.Get_rank() == 0: print 'TOTAL TIME:' timer.toc() main.run_main(configs=c) else: if use_multiprocessing_pool: pool = multiprocessing_utility.LoggingPool(processes=pool_size) pool.map(launch_subprocess_args, num_labels_list) else: for i in num_labels_list: launch_subprocess_args(i) comm = MPI.COMM_WORLD if comm.Get_rank() == 0: print 'TOTAL TIME:' timer.toc() main.run_main()