def simple_gradient_descent_1_hidden_layer_eta_checker(): def get_random_bws(nl): l = [np.random.uniform(-1./np.sqrt(n), 1./np.sqrt(n), (m+1, n)) for m, n in zip(nl[:-1], nl[1:])] larray = np.empty(len(l), dtype=object) larray[:] = l return larray get_random_matrix = lambda shape: np.random.random(shape)*2-1 def split_list(l, chunks): # get chunks of the list l assert isinstance(l, list) length = len(l) assert 0 < length assert 0 < chunks <= length n = length//chunks n1 = n+1 len1 = (length%chunks)*n1 return [l[i:i+n1] for i in xrange(0, len1, n1)]+\ [l[i:i+n] for i in xrange(len1, length, n)] # class TestCases(unittest.TestCase): # def testOne(self): # l = [1,2,3,4,5,6,7,8,9,10] # l3 = [[1,2,3,4], [5,6,7], [8,9,10]] # self.failUnless([]==split_list(l, 3)) # unittest.main() def get_plots(nns, title, file_path_name): fig, arr = plt.subplots(2, len(nns), figsize=(len(nns)*4, 11)) plt.suptitle(title, fontsize=16) # arr[0, 0].set_title("Best Errors") # arr[1, 0].set_title("Best Etas") all_errors_min = [] all_errors_max = [] all_etas_min = [] all_etas_max = [] for nn in nns: arrays = (nn.errors_train, nn.errors_valid, nn.errors_test) all_errors_min.append(np.min(arrays)) all_errors_max.append(np.max(arrays)) all_etas_min.append(np.min(nn.etas)) all_etas_max.append(np.max(nn.etas)) error_min = np.min(all_errors_min) error_max = np.max(all_errors_max) eta_min = np.min(all_etas_min) eta_max = np.max(all_etas_max) for i, nn in enumerate(nns): alpha_str = "{:4.2f}".format(nn.alpha) nl_str = "_".join(list(map(str, nn.nl))) arr[0, i].set_title("Errors, alpha: {}\nnl: {}".format(alpha_str, nl_str)) arr[0, i].set_ylim(error_min, error_max) arr[0, i].set_yscale("log", nonposy='clip') p_train = arr[0, i].plot(nn.errors_train, "b-")[0] p_train.set_label("train") p_valid = arr[0, i].plot(nn.errors_valid, "g-")[0] p_valid.set_label("valid") p_test = arr[0, i].plot(nn.errors_test, "r-")[0] p_test.set_label("test") arr[0, i].legend() arr[1, i].set_title("Eta, alpha: {}\nnl: {}".format(alpha_str, nl_str)) # arr[1, i].set_title("Eta, alpha: {}".format(alpha_str)) arr[1, i].set_ylim(eta_min, eta_max) arr[1, i].set_yscale("log", nonposy='clip') p_eta = arr[1, i].plot(nn.etas, "b.")[0] p_eta.set_label("eta") arr[1, i].legend() plt.subplots_adjust(left=0.03, bottom=0.05, right=0.98, top=0.90, wspace=0.1, hspace=0.18) plt.savefig(file_path_name) # plt.show() def worker_thread(pipe_in, pipe_out): proc_nr, nn_file_paths, data_values = pipe_in.recv() print("start proc_nr #{}".format(proc_nr)) for i, nn_file_path in enumerate(nn_file_paths): with gzip.GzipFile(nn_file_path, "rb") as f: nn = dill.load(f) print("proc_nr #{}: nn #{} with {} hid func".format(proc_nr, i, nn.f_hidden_func_str)) nn.get_trained_network(data_values) with gzip.GzipFile(nn_file_path, "wb") as f: dill.dump(nn, f) print("finish proc_nr #{}".format(proc_nr)) home = os.path.expanduser("~") full_path_networks_folder = home+"/Documents/networks_gradient_descent" path_network = full_path_networks_folder+"/networks" path_pictures = full_path_networks_folder+"/pictures" path_data = full_path_networks_folder+"/data" if not os.path.exists(path_network): os.makedirs(path_network) if not os.path.exists(path_pictures): os.makedirs(path_pictures) if not os.path.exists(path_data): os.makedirs(path_data) m = 300 k = 15 n = 9 nl = [k, 30, 20, 20, n] eta_start = 0.05 cpu_amount = mp.cpu_count() # bws1 = get_random_bws(nl) # T_train = nn.calc_forward(X_train, bws1) # T_valid = nn.calc_forward(X_valid, bws1) # T_test = nn.calc_forward(X_test, bws1) file_path_data = path_data+"/data_set_1.pkl.gz" if not os.path.exists(file_path_data): X_train = get_random_matrix((m, k)) X_valid = get_random_matrix((int(m*0.6), k)) X_test = get_random_matrix((int(m*0.6), k)) M = get_random_matrix((k, n)) T_train = X_train.dot(M) T_valid = X_valid.dot(M) T_test = X_test.dot(M) T_train[T_train>=0.] = 1. T_train[T_train<0.] = 0. T_valid[T_valid>=0.] = 1. T_valid[T_valid<0.] = 0. T_test[T_test>=0.] = 1. T_test[T_test<0.] = 0. data_values = DotMap() data_values.X_train = X_train data_values.T_train = T_train data_values.X_valid = X_valid data_values.T_valid = T_valid data_values.X_test = X_test data_values.T_test = T_test with gzip.GzipFile(file_path_data, "wb") as f: dill.dump(data_values, f) else: with gzip.GzipFile(file_path_data, "rb") as f: data_values = dill.laod(f) bws = get_random_bws(nl) nn = NeuralNetwork() bws_prev = bws bwsd_prev = nn.calc_backprop(data_values.X_train, bws, data_values.T_train) bws = bws-eta_start*bwsd_prev alphas = np.arange(0, 8)*0.1 # alphas = np.arange(0, cpu_amount)*0.1 alphas_str = list(map(lambda x: "{:4.2f}".format(x), alphas)) alphas_str_under = list(map(lambda x: x.replace(".", "_"), alphas_str)) iterations = 50 func_strs = ["sig", "tanh", "relu"] nn_file_paths_all = [] nn_file_paths_combined = [] for func_str in func_strs: # print("") nns = [NeuralNetwork(eta=eta_start) for _ in xrange(0, len(alphas))] for nn, alpha in zip(nns, alphas): nn.nl = deepcopy(nl) nn.bws = deepcopy(bws) nn.bws_prev = deepcopy(bws_prev) nn.alpha = alpha nn.iterations = iterations nn.set_hidden_function(func_str) nn_file_paths = [path_network+"/network_nr_{}_func_{}.pkl.gz".format(i, func_str) for i in xrange(0, len(nns))] for nn, nn_file_path in zip(nns, nn_file_paths): with gzip.GzipFile(nn_file_path, "wb") as f: dill.dump(nn, f) nn_file_paths_all.extend(nn_file_paths) nn_file_paths_combined.append((func_str, nn_file_paths)) # TODO: split the nn_file_paths_all in cpu_amount equal lists # first mix the list # l = np.arange(0, 10).tolist() # l_chunks = split_list(l, 3) # print("l: {}".format(l)) # print("l_chunks: {}".format(l_chunks)) # sys.exit(0) nn_file_paths_mixed = np.array(nn_file_paths_all)[ np.random.permutation(np.arange(0, len(nn_file_paths_all)))].tolist() # print("nn_file_paths_mixed:\n{}".format(nn_file_paths_mixed)) nn_file_paths_chunks = split_list(nn_file_paths_mixed, cpu_amount) pipes_main_threads = [Pipe() for _ in xrange(0, cpu_amount)] pipes_threads_main = [Pipe() for _ in xrange(0, cpu_amount)] thread_pipes_out, main_pipes_in = list(zip(*pipes_main_threads)) main_pipes_out, thread_pipes_in = list(zip(*pipes_threads_main)) procs = [Process(target=worker_thread, args=(pipe_in, pipe_out)) for pipe_in, pipe_out in zip(thread_pipes_in, thread_pipes_out)] for proc in procs: proc.start() for i, (main_pipe_out, nn_file_paths) in enumerate(zip(main_pipes_out, nn_file_paths_chunks)): main_pipe_out.send((i, nn_file_paths, data_values)) for proc in procs: proc.join() print("") title_template = "With {} hidden activation function" for func_str, nn_file_paths in nn_file_paths_combined: nns = [] for nn_file_path in nn_file_paths: with gzip.GzipFile(nn_file_path, "rb") as f: nn = dill.load(f) nns.append(nn) get_plots(nns, title_template.format(func_str), path_pictures+"/{}_alphas.png".format(func_str)) print("finish plot with {} function".format(func_str))