def single_run_single_gate(params): start_time = time.time() #evauntually uncomment this leaving asis in order ot keep the same results as before to compare. #set_random_seeds(params) if not os.path.exists(params['save_dir']): os.makedirs(params['save_dir']) with open(os.path.join(params['save_dir'], 'params.pkl'), 'wb') as f: pickle.dump(params, f) data_input = DataInput(params['data_params']) data_input.split_data(split_seed=params['random_seed']) data_transformer = DataTransformerFactory( params['transform_params'], params['random_seed']).manufacture_transformer() data_input.embed_data_and_fit_transformer(\ data_transformer, cells_to_subsample=params['transform_params']['cells_to_subsample'], num_cells_for_transformer=params['transform_params']['num_cells_for_transformer'] ) data_input.save_transformer(params['save_dir']) data_input.normalize_data() unused_cluster_gate_inits = init_plot_and_save_gates(data_input, params) #everything below differs from the other main_UMAP data_input.convert_all_data_to_tensors() init_gate_tree, unused_cluster_gate_inits = get_next_gate_tree( unused_cluster_gate_inits, data_input, params, model=None) model = initialize_model(params['model_params'], [init_gate_tree]) performance_tracker = run_train_model(model, params['train_params'], data_input) model_save_path = os.path.join(params['save_dir'], 'model.pkl') torch.save(model.state_dict(), model_save_path) trackers_save_path = os.path.join(params['save_dir'], 'last_CV_rounds_tracker.pkl') with open(trackers_save_path, 'wb') as f: pickle.dump(performance_tracker, f) results_plotter = DataAndGatesPlotterDepthOne( model, np.concatenate(data_input.x_tr)) #fig, axes = plt.subplots(params['gate_init_params']['n_clusters'], figsize=(1 * params['gate_init_params']['n_clusters'], 3 * params['gate_init_params']['n_clusters'])) results_plotter.plot_data_with_gates( np.array( np.concatenate([ data_input.y_tr[i] * torch.ones([data_input.x_tr[i].shape[0], 1]) for i in range(len(data_input.x_tr)) ]))) plt.savefig(os.path.join(params['save_dir'], 'final_gates.png')) with open(os.path.join(params['save_dir'], 'configs.pkl'), 'wb') as f: pickle.dump(params, f) print('Complete main loop took %.4f seconds' % (time.time() - start_time)) return performance_tracker, model
def main(path_to_params): start_time = time.time() params = TransformParameterParser(path_to_params).parse_params() print(params) if not os.path.exists(params['save_dir']): os.makedirs(params['save_dir']) with open(os.path.join(params['save_dir'], 'params.pkl'), 'wb') as f: pickle.dump(params, f) data_input = DataInput(params['data_params']) data_input.split_data() data_transformer = DataTransformerFactory( params['transform_params'], params['random_seed']).manufacture_transformer() data_input.embed_data_and_fit_transformer(\ data_transformer, params['transform_params']['cells_to_subsample'], params['transform_params']['num_cells_for_transformer']) data_input.save_transformer(params['save_dir']) data_input.normalize_data() init_gate_tree = init_plot_and_save_gates(data_input, params) model = initialize_model(params['model_params'], init_gate_tree) data_input.prepare_data_for_training() performance_tracker = run_train_model(model, params['train_params'], data_input) model_save_path = os.path.join(params['save_dir'], 'model.pkl') torch.save(model.state_dict(), model_save_path) tracker_save_path = os.path.join(params['save_dir'], 'tracker.pkl') with open(tracker_save_path, 'wb') as f: pickle.dump(performance_tracker, f) results_plotter = DataAndGatesPlotterDepthOne( model, np.concatenate(data_input.x_tr)) #fig, axes = plt.subplots(params['gate_init_params']['n_clusters'], figsize=(1 * params['gate_init_params']['n_clusters'], 3 * params['gate_init_params']['n_clusters'])) results_plotter.plot_data_with_gates( np.array( np.concatenate([ data_input.y_tr[i] * torch.ones([data_input.x_tr[i].shape[0], 1]) for i in range(len(data_input.x_tr)) ]))) plt.savefig(os.path.join(params['save_dir'], 'final_gates.png')) print('Complete main loop took %.4f seconds' % (time.time() - start_time))
def cross_validate(path_to_params, n_runs, start_seed=0): start_time = time.time() params = TransformParameterParser(path_to_params).parse_params() print(params) check_consistency_of_params(params) #evauntually uncomment this leaving asis in order ot keep the same results as before to compare. set_random_seeds(params) if not os.path.exists(params['save_dir']): os.makedirs(params['save_dir']) with open(os.path.join(params['save_dir'], 'params.pkl'), 'wb') as f: pickle.dump(params, f) data_input = DataInput(params['data_params']) te_accs = [] tr_accs = [] # to get to the correct new split at start for i in range(start_seed): data_input.split_data() for run in range(start_seed, n_runs): if not os.path.exists(os.path.join(params['save_dir'], 'run%d' % run)): os.makedirs(os.path.join(params['save_dir'], 'run%d' % run)) savepath = os.path.join(params['save_dir'], 'run%d' % run) data_input.split_data() print(data_input.idxs_tr) data_transformer = DataTransformerFactory( params['transform_params'], params['random_seed']).manufacture_transformer() data_input.embed_data_and_fit_transformer(\ data_transformer, cells_to_subsample=params['transform_params']['cells_to_subsample'], num_cells_for_transformer=params['transform_params']['num_cells_for_transformer'], use_labels_to_transform_data=params['transform_params']['use_labels_to_transform_data'] ) data_input.save_transformer(savepath) data_input.normalize_data() unused_cluster_gate_inits = init_plot_and_save_gates( data_input, params) #everything below differs from the other main_UMAP data_input.convert_all_data_to_tensors() init_gate_tree, unused_cluster_gate_inits = get_next_gate_tree( unused_cluster_gate_inits, data_input, params, model=None) model = initialize_model(params['model_params'], [init_gate_tree]) performance_tracker = run_train_model(model, params['train_params'], data_input) model_save_path = os.path.join(savepath, 'model.pkl') torch.save(model.state_dict(), model_save_path) tracker_save_path = os.path.join(savepath, 'tracker.pkl') with open(tracker_save_path, 'wb') as f: pickle.dump(performance_tracker, f) results_plotter = DataAndGatesPlotterDepthOne( model, np.concatenate(data_input.x_tr)) #fig, axes = plt.subplots(params['gate_init_params']['n_clusters'], figsize=(1 * params['gate_init_params']['n_clusters'], 3 * params['gate_init_params']['n_clusters'])) results_plotter.plot_data_with_gates( np.array( np.concatenate([ data_input.y_tr[i] * torch.ones([data_input.x_tr[i].shape[0], 1]) for i in range(len(data_input.x_tr)) ]))) plt.savefig(os.path.join(savepath, 'final_gates.png')) with open(os.path.join(savepath, 'configs.pkl'), 'wb') as f: pickle.dump(params, f) print('Complete main loop for run %d took %.4f seconds' % (run, time.time() - start_time)) start_time = time.time() print('Accuracy tr %.3f, te %.3f' % (performance_tracker.metrics['tr_acc'][-1], performance_tracker.metrics['te_acc'][-1])) te_accs.append(performance_tracker.metrics['te_acc'][-1]) tr_accs.append(performance_tracker.metrics['tr_acc'][-1]) tr_accs = np.array(tr_accs) te_accs = np.array(te_accs) print('Average tr acc: %.3f, te acc %.3f' % (np.mean(tr_accs), np.mean(te_accs))) print('Std dev tr acc: %.3f, te_acc %.3f' % (np.std(tr_accs), np.std(te_accs)))
def main(path_to_params): start_time = time.time() params = TransformParameterParser(path_to_params).parse_params() print(params) check_consistency_of_params(params) #evauntually uncomment this leaving asis in order ot keep the same results as before to compare. set_random_seeds(params) if not os.path.exists(params['save_dir']): os.makedirs(params['save_dir']) with open(os.path.join(params['save_dir'], 'params.pkl'), 'wb') as f: pickle.dump(params, f) data_input = DataInput(params['data_params']) data_input.split_data() print('%d samples in the training data' % len(data_input.x_tr)) data_transformer = DataTransformerFactory( params['transform_params'], params['random_seed']).manufacture_transformer() data_input.embed_data_and_fit_transformer(\ data_transformer, cells_to_subsample=params['transform_params']['cells_to_subsample'], num_cells_for_transformer=params['transform_params']['num_cells_for_transformer'], use_labels_to_transform_data=params['transform_params']['use_labels_to_transform_data'] ) # can't pickle opentsne objects if not params['transform_params'] == 'tsne': data_input.save_transformer(params['save_dir']) data_input.normalize_data() potential_gates = get_all_potential_gates(data_input, params) data_input.convert_all_data_to_tensors() model = initialize_model(params['model_params'], potential_gates) if params['train_params']['fix_gates']: model.freeze_gate_params() tracker = run_train_model(\ model, params['train_params'], data_input ) # if params['transform_params']['embed_dim'] == 3: # unused_cluster_gate_inits = init_gates(data_input, params) # else: # unused_cluster_gate_inits = init_plot_and_save_gates(data_input, params) # #everything below differs from the other main_UMAP # data_input.convert_all_data_to_tensors() # init_gate_tree, unused_cluster_gate_inits = get_next_gate_tree(unused_cluster_gate_inits, data_input, params, model=None) # model = initialize_model(params['model_params'], [init_gate_tree]) # trackers_per_round = [] # num_gates_left = len(unused_cluster_gate_inits) # #print(num_gates_left, 'asdfasdfasdfasdfasdfasdfas') # for i in range(num_gates_left + 1): # performance_tracker = run_train_model(model, params['train_params'], data_input) # trackers_per_round.append(performance_tracker.get_named_tuple_rep()) # if i == params['train_params']['num_gates_to_learn'] - 1: # break # if not i == num_gates_left: # next_gate_tree, unused_cluster_gate_inits = get_next_gate_tree(unused_cluster_gate_inits, data_input, params, model=model) # model.add_node(next_gate_tree) model_save_path = os.path.join(params['save_dir'], 'model.pkl') torch.save(model.state_dict(), model_save_path) tracker_save_path = os.path.join(params['save_dir'], 'tracker.pkl') # trackers_per_round = [tracker.get_named_tuple_rep() for tracker in trackers_per_round] with open(tracker_save_path, 'wb') as f: pickle.dump(tracker, f) if params['plot_umap_reflection']: # reflection is about x=.5 since the data is already in umap space here reflected_data = [] for data in data_input.x_tr: data[:, 0] = 1 - data[:, 0] reflected_data.append(data) data_input.x_tr = reflected_data gate_tree = model.get_gate_tree() reflected_gates = [] for gate in gate_tree: print(gate) #order switches since reflected over x=.5 low_reflected = 1 - gate[0][2] high_reflected = 1 - gate[0][1] gate[0][1] = low_reflected gate[0][2] = high_reflected print(gate) reflected_gates.append(gate) model.init_nodes(reflected_gates) print(model.init_nodes) print(model.get_gates()) results_plotter = DataAndGatesPlotterDepthOne( model, np.concatenate(data_input.x_tr)) #fig, axes = plt.subplots(params['gate_init_params']['n_clusters'], figsize=(1 * params['gate_init_params']['n_clusters'], 3 * params['gate_init_params']['n_clusters'])) if params['transform_params']['embed_dim'] == 2: results_plotter.plot_data_with_gates( np.array( np.concatenate([ data_input.y_tr[i] * torch.ones([data_input.x_tr[i].shape[0], 1]) for i in range(len(data_input.x_tr)) ]))) plt.savefig(os.path.join(params['save_dir'], 'final_gates.png')) else: fig_pos, ax_pos, fig_neg, ax_neg = results_plotter.plot_data_with_gates( np.array( np.concatenate([ data_input.y_tr[i] * torch.ones([data_input.x_tr[i].shape[0], 1]) for i in range(len(data_input.x_tr)) ]))) with open(os.path.join(params['save_dir'], 'final_gates_pos_3d.pkl'), 'wb') as f: pickle.dump(fig_pos, f) with open(os.path.join(params['save_dir'], 'final_gates_neg_3d.pkl'), 'wb') as f: pickle.dump(fig_neg, f) with open(os.path.join(params['save_dir'], 'configs.pkl'), 'wb') as f: pickle.dump(params, f) print('Learned weights:', model.linear.weight) print('Complete main loop took %.4f seconds' % (time.time() - start_time))
def main(params): start_time = time.time() #evauntually uncomment this leaving asis in order ot keep the same results as before to compare. set_random_seeds(params) if not os.path.exists(params['save_dir']): os.makedirs(params['save_dir']) with open(os.path.join(params['save_dir'], 'params.pkl'), 'wb') as f: pickle.dump(params, f) data_input = DataInput(params['data_params']) data_input.split_data() print('%d samples in the training data' % len(data_input.x_tr)) # force identity for the first transform data_transformer = DataTransformerFactory({ 'transform_type': 'identity' }, params['random_seed']).manufacture_transformer() data_input.embed_data_and_fit_transformer(\ data_transformer, cells_to_subsample=params['transform_params']['cells_to_subsample'], num_cells_for_transformer=params['transform_params']['num_cells_for_transformer'], use_labels_to_transform_data=params['transform_params']['use_labels_to_transform_data'] ) # can't pickle opentsne objects if not params['transform_params'] == 'tsne': data_input.save_transformer(params['save_dir']) data_input.normalize_data() # gates aren't plotted because we're in n dimensions unused_cluster_gate_inits = init_gates(data_input, params) data_input.convert_all_data_to_tensors() init_gate_tree, unused_cluster_gate_inits = get_next_gate_tree( unused_cluster_gate_inits, data_input, params, model=None) model = initialize_model(params['model_params'], [init_gate_tree]) trackers_per_round = [] num_gates_left = len(unused_cluster_gate_inits) for i in range(num_gates_left + 1): performance_tracker = run_train_model(model, params['train_params'], data_input) trackers_per_round.append(performance_tracker.get_named_tuple_rep()) if i == params['train_params']['num_gates_to_learn'] - 1: break if not i == num_gates_left: next_gate_tree, unused_cluster_gate_inits = get_next_gate_tree( unused_cluster_gate_inits, data_input, params, model=model) model.add_node(next_gate_tree) model_save_path = os.path.join(params['save_dir'], 'model.pkl') torch.save(model.state_dict(), model_save_path) trackers_save_path = os.path.join(params['save_dir'], 'trackers.pkl') # trackers_per_round = [tracker.get_named_tuple_rep() for tracker in trackers_per_round] with open(trackers_save_path, 'wb') as f: pickle.dump(trackers_per_round, f) if params['plot_umap_reflection']: # reflection is about x=.5 since the data is already in umap space here reflected_data = [] for data in data_input.x_tr: data[:, 0] = 1 - data[:, 0] reflected_data.append(data) data_input.x_tr = reflected_data gate_tree = model.get_gate_tree() reflected_gates = [] for gate in gate_tree: print(gate) #order switches since reflected over x=.5 low_reflected = 1 - gate[0][2] high_reflected = 1 - gate[0][1] gate[0][1] = low_reflected gate[0][2] = high_reflected print(gate) reflected_gates.append(gate) model.init_nodes(reflected_gates) print(model.init_nodes) print(model.get_gates()) data_transformer = DataTransformerFactory( params['transform_params'], params['random_seed']).manufacture_transformer() data_input.convert_all_data_to_numpy() data_input.x_tr = data_input.x_tr_raw data_input.x_te = data_input.x_te_raw old_scale = data_input.scale old_offset = data_input.offset print("fitting projection") data_input.embed_data_and_fit_transformer(\ data_transformer, cells_to_subsample=params['transform_params']['cells_to_subsample'], num_cells_for_transformer=params['transform_params']['num_cells_for_transformer'], use_labels_to_transform_data=params['transform_params']['use_labels_to_transform_data'] ) results_plotter = MultidimDataAndGatesPlotter( model, np.concatenate(data_input.x_tr), np.concatenate(data_input.untransformed_matched_x_tr), old_scale, old_offset, data_input.transformer) results_plotter.plot_in_feature_space( np.array( np.concatenate([ data_input.y_tr[i] * torch.ones([data_input.x_tr[i].shape[0], 1]) for i in range(len(data_input.x_tr)) ]))) plt.savefig(os.path.join(params['save_dir'], 'feature_results.png')) if params['transform_params']['embed_dim'] == 2: results_plotter.plot_data_with_gates( np.array( np.concatenate([ data_input.y_tr[i] * torch.ones([data_input.x_tr[i].shape[0], 1]) for i in range(len(data_input.x_tr)) ]))) plt.savefig(os.path.join(params['save_dir'], 'final_gates.png')) else: fig_pos, ax_pos, fig_neg, ax_neg = results_plotter.plot_data_with_gates( np.array( np.concatenate([ data_input.y_tr[i] * torch.ones([data_input.x_tr[i].shape[0], 1]) for i in range(len(data_input.x_tr)) ]))) with open(os.path.join(params['save_dir'], 'final_gates_pos_3d.pkl'), 'wb') as f: pickle.dump(fig_pos, f) with open(os.path.join(params['save_dir'], 'final_gates_neg_3d.pkl'), 'wb') as f: pickle.dump(fig_neg, f) with open(os.path.join(params['save_dir'], 'configs.pkl'), 'wb') as f: pickle.dump(params, f) print('Complete main loop took %.4f seconds' % (time.time() - start_time)) return trackers_per_round[-1]
def main(path_to_params): start_time = time.time() params = TransformParameterParser(path_to_params).parse_params() print(params) check_consistency_of_params(params) #evauntually uncomment this leaving asis in order ot keep the same results as before to compare. set_random_seeds(params) if not os.path.exists(params['save_dir']): os.makedirs(params['save_dir']) with open(os.path.join(params['save_dir'], 'params.pkl'), 'wb') as f: pickle.dump(params, f) data_input = DataInput(params['data_params']) data_input.split_data() data_transformer = DataTransformerFactory( params['transform_params'], params['random_seed']).manufacture_transformer() data_input.embed_data_and_fit_transformer(\ data_transformer, cells_to_subsample=params['transform_params']['cells_to_subsample'], num_cells_for_transformer=params['transform_params']['num_cells_for_transformer'], use_labels_to_transform_data=params['transform_params']['use_labels_to_transform_data'] ) data_input.save_transformer(params['save_dir']) data_input.normalize_data() unused_cluster_gate_inits = init_plot_and_save_gates(data_input, params) data_input.convert_all_data_to_tensors() init_gate_tree, unused_cluster_gate_inits = get_next_gate_tree( unused_cluster_gate_inits, data_input, params, model=None) model1 = initialize_model(params['model_params'], [init_gate_tree]) performance_tracker1 = run_train_model(model1, params['train_params'], data_input) model1_save_path = os.path.join(params['save_dir'], 'model1.pkl') torch.save(model1.state_dict(), model1_save_path) tracker1_save_path = os.path.join(params['save_dir'], 'tracker1.pkl') with open(tracker1_save_path, 'wb') as f: pickle.dump(performance_tracker1, f) # now select the data inside the learned model1 gate and re-run umap data_input.filter_data_inside_first_model_gate(model1) unused_cluster_gate_inits = init_plot_and_save_gates(data_input, params) data_transformer = DataTransformerFactory( params['transform_params'], params['random_seed']).manufacture_transformer() data_input.embed_data_and_fit_transformer(\ data_transformer, cells_to_subsample=params['transform_params']['cells_to_subsample'], num_cells_for_transformer=params['transform_params']['num_cells_for_transformer'], use_labels_to_transform_data=params['transform_params']['use_labels_to_transform_data'] ) data_input.save_transformer(params['save_dir']) data_input.convert_all_data_to_tensors() init_gate_tree, _ = get_next_gate_tree(unused_cluster_gate_inits, data_input, params, model=None) model2 = initialize_model(params['model_params'], [init_gate_tree]) performance_tracker2 = run_train_model(model2, params['train_params'], data_input) model2_save_path = os.path.join(params['save_dir'], 'model2.pkl') torch.save(model2.state_dict(), model2_save_path) tracker2_save_path = os.path.join(params['save_dir'], 'tracker2.pkl') with open(tracker2_save_path, 'wb') as f: pickle.dump(performance_tracker2, f) results_plotter = DataAndGatesPlotterDepthOne( model2, np.concatenate(data_input.x_tr)) #fig, axes = plt.subplots(params['gate_init_params']['n_clusters'], figsize=(1 * params['gate_init_params']['n_clusters'], 3 * params['gate_init_params']['n_clusters'])) results_plotter.plot_data_with_gates( np.array( np.concatenate([ data_input.y_tr[i] * torch.ones([data_input.x_tr[i].shape[0], 1]) for i in range(len(data_input.x_tr)) ]))) plt.savefig(os.path.join(params['save_dir'], 'final_gates.png')) with open(os.path.join(params['save_dir'], 'configs.pkl'), 'wb') as f: pickle.dump(params, f) print('Complete main loop took %.4f seconds' % (time.time() - start_time))
def main(path_to_params): start_time = time.time() params = TransformParameterParser(path_to_params).parse_params() print(params) check_consistency_of_params(params) set_random_seeds(params) if not os.path.exists(params['save_dir']): os.makedirs(params['save_dir']) with open(os.path.join(params['save_dir'], 'params.pkl'), 'wb') as f: pickle.dump(params, f) data_input = DataInput(params['data_params']) data_input.split_data() data_transformer = DataTransformerFactory( params['transform_params'], params['random_seed']).manufacture_transformer() data_input.embed_data_and_fit_transformer(\ data_transformer, params['transform_params']['cells_to_subsample'], params['transform_params']['num_cells_for_transformer'] ) data_input.save_transformer(params['save_dir']) data_input.normalize_data() #everything below differs from the other main_UMAP multi_gate_initializer = MultipleGateInitializerHeuristic( data_input, params['model_params']['node_type'], params['gate_init_multi_heuristic_params']) init_gate_tree = [multi_gate_initializer.init_next_gate()] model = initialize_model(params['model_params'], init_gate_tree) data_input.prepare_data_for_training() trackers_per_step = [] num_gates = params['gate_init_multi_heuristic_params']['num_gates'] for i in range(num_gates): performance_tracker = run_train_model(model, params['train_params'], data_input) multi_gate_initializer.gates = model.get_gates() if not (i == num_gates - 1): print(model.get_gates()) next_gate = multi_gate_initializer.init_next_gate() if next_gate is None: print( 'There are no non-overlapping initializations left to try!' ) break model.add_node(next_gate) model_save_path = os.path.join(params['save_dir'], 'model.pkl') torch.save(model.state_dict(), model_save_path) tracker_save_path = os.path.join(params['save_dir'], 'tracker.pkl') with open(tracker_save_path, 'wb') as f: pickle.dump(performance_tracker, f) results_plotter = DataAndGatesPlotterDepthOne( model, np.concatenate(data_input.x_tr)) #fig, axes = plt.subplots(params['gate_init_params']['n_clusters'], figsize=(1 * params['gate_init_params']['n_clusters'], 3 * params['gate_init_params']['n_clusters'])) results_plotter.plot_data_with_gates( np.array( np.concatenate([ data_input.y_tr[i] * torch.ones([data_input.x_tr[i].shape[0], 1]) for i in range(len(data_input.x_tr)) ]))) plt.savefig(os.path.join(params['save_dir'], 'final_gates.png')) print('Complete main loop took %.4f seconds' % (time.time() - start_time))