test_auc_output = [] val_auc_output = [] average_time_seed = 0 for s in args.seeds: init_time = time.time() print('New seed: ' + str(s)) print(torch.cuda.memory_allocated()) print(torch.cuda.memory_reserved()) dendronet = DendroMatrixLinReg(device, root_weights, parent_path_tensor, edge_tensor_matrix) best_root_weights = dendronet.root_weights best_delta_matrix = dendronet.delta_mat train_idx, test_idx = split_indices(mapping, seed=0) train_idx, val_idx = split_indices(train_idx, seed=s) # creating idx dataset objects for batching train_set = IndicesDataset(train_idx) val_set = IndicesDataset(val_idx) test_set = IndicesDataset(test_idx) # Setting some parameters for shuffle batch params = {'batch_size': BATCH_SIZE, 'shuffle': True, 'num_workers': 0} train_batch_gen = torch.utils.data.DataLoader(train_set, **params) val_batch_gen = torch.utils.data.DataLoader(val_set, **params) test_batch_gen = torch.utils.data.DataLoader(test_set, **params)
for seed in seeds: # Multi_CT_conv = MultiCTConvNet(device=device, num_cell_types=len(cell_names), seq_length=501, # kernel_size=26, number_of_kernels=64, polling_window=7) convolution = SeqConvModule(device=device, seq_length=501, kernel_sizes=(16, 3, 3), num_of_kernels=(128, 64, 32), polling_windows=(3, 4), input_channels=4) fully_connected = FCModule(device=device, layer_sizes=(len(tissue_encodings[0]) + 32, 32, 1)) packed_train_idx, packed_test_idx = split_indices(packed_samples, seed=0) packed_train_idx, packed_val_idx = split_indices(packed_train_idx, seed=seed) # unpacking train_idx = [] val_idx = [] test_idx = [] for packet in packed_train_idx: for sample in packet: train_idx.append(sample) for packet in packed_val_idx: for sample in packet: val_idx.append(sample)
num_nodes = len(node_list) num_edges = num_nodes - 1 # constructing the parent-child matrix, would be nice to find a faster way to do this parent_child_mat = np.zeros(shape=(num_nodes, num_nodes), dtype=np.float32) for child_idx in range(1, len(node_list)): # excluding the root parent_idx = node_list.index(node_list[child_idx].parent) parent_child_mat[parent_idx, child_idx] = 1.0 pp_mat = build_parent_path_mat(parent_child_mat, num_edges=num_edges) # split the leaves into train and test train_idx, valid_idx = split_indices(range(len(leaves))) # constructing train and valid x and y matrices train_col_idx = [leaves[i][0] for i in train_idx] valid_col_idx = [leaves[i][0] for i in valid_idx] train_col_idx_tensor = torch.tensor(train_col_idx, device=device) valid_col_idx_tensor = torch.tensor(valid_col_idx, device=device) train_x = torch.tensor(np.asarray( [leaves[i][1].x for i in train_idx]), device=device, dtype=torch.double) train_y = torch.tensor(np.asarray( [leaves[i][1].y for i in train_idx]),
val_auc_output = [] average_time_seed = 0 # to test time performance of the training of this model for s in args.seeds: init_time = time.time() # simple linear model to which a sigmoid will be applied in order to make it a logistic model # Used for comparison with DendroNet performance # We use a linear regression in order to be able to use BCEWithLogitsLoss as loss function, # a more stable version of BCEloss logistic = LinRegModel(len(X[0])) logistic.to(device) logistic = logistic.double() best_weights = logistic.lin_1 train_idx, test_idx = split_indices(range(len(X)), seed=0) train_idx, val_idx = split_indices(train_idx, seed=s) # creating idx dataset objects for batching train_set = IndicesDataset(train_idx) val_set = IndicesDataset(val_idx) test_set = IndicesDataset(test_idx) # Setting some parameters for shuffle batch params = {'batch_size': BATCH_SIZE, 'shuffle': True, 'num_workers': 0} train_batch_gen = torch.utils.data.DataLoader(train_set, **params) val_batch_gen = torch.utils.data.DataLoader(val_set, **params) test_batch_gen = torch.utils.data.DataLoader(test_set, **params) # converting X and y to tensors, and transferring to GPU if the cuda flag is set
# each column holds a trainable delta vector for each edge in the graph edge_tensor_matrix = np.zeros(shape=(num_features, num_edges)) """ Now we have all the components, and can create an instance of the DendroNet model specific to our graph architecture We will use a linear regressor as the base architecture """ dendronet = DendroMatrixLinReg(device, root_weights, parent_path_tensor, edge_tensor_matrix) """ We typically want to split into a train and test/validation set. We will do this by assigning indices to either group so that we can keep the mapping between parent_path column ID and the corresponding sample in X and y We can then use the IndicesDataset class to perform shuffle-batching during training """ train_idx, test_idx = split_indices(range(len(y))) # creating idx dataset objects for batching train_set = IndicesDataset(train_idx) test_set = IndicesDataset(test_idx) # Setting some parameters for shuffle batch params = {'batch_size': BATCH_SIZE, 'shuffle': True, 'num_workers': 0} train_batch_gen = torch.utils.data.DataLoader(train_set, **params) test_batch_gen = torch.utils.data.DataLoader(test_set, **params) # converting X and y to tensors, and transferring to GPU if the cuda flag is set X = torch.tensor(X, dtype=torch.double, device=device) y = torch.tensor(y, dtype=torch.double, device=device)