test_auc_output = []
    val_auc_output = []
    average_time_seed = 0

    for s in args.seeds:
        init_time = time.time()

        print('New seed: ' + str(s))
        print(torch.cuda.memory_allocated())
        print(torch.cuda.memory_reserved())

        dendronet = DendroMatrixLinReg(device, root_weights, parent_path_tensor, edge_tensor_matrix)
        best_root_weights = dendronet.root_weights
        best_delta_matrix = dendronet.delta_mat

        train_idx, test_idx = split_indices(mapping, seed=0)
        train_idx, val_idx = split_indices(train_idx, seed=s)

        # creating idx dataset objects for batching
        train_set = IndicesDataset(train_idx)
        val_set = IndicesDataset(val_idx)
        test_set = IndicesDataset(test_idx)

        # Setting some parameters for shuffle batch
        params = {'batch_size': BATCH_SIZE,
                  'shuffle': True,
                  'num_workers': 0}

        train_batch_gen = torch.utils.data.DataLoader(train_set, **params)
        val_batch_gen = torch.utils.data.DataLoader(val_set, **params)
        test_batch_gen = torch.utils.data.DataLoader(test_set, **params)
示例#2
0
    for seed in seeds:
        # Multi_CT_conv = MultiCTConvNet(device=device, num_cell_types=len(cell_names), seq_length=501,
        #                                 kernel_size=26, number_of_kernels=64, polling_window=7)

        convolution = SeqConvModule(device=device,
                                    seq_length=501,
                                    kernel_sizes=(16, 3, 3),
                                    num_of_kernels=(128, 64, 32),
                                    polling_windows=(3, 4),
                                    input_channels=4)
        fully_connected = FCModule(device=device,
                                   layer_sizes=(len(tissue_encodings[0]) + 32,
                                                32, 1))

        packed_train_idx, packed_test_idx = split_indices(packed_samples,
                                                          seed=0)
        packed_train_idx, packed_val_idx = split_indices(packed_train_idx,
                                                         seed=seed)

        # unpacking
        train_idx = []
        val_idx = []
        test_idx = []

        for packet in packed_train_idx:
            for sample in packet:
                train_idx.append(sample)

        for packet in packed_val_idx:
            for sample in packet:
                val_idx.append(sample)
示例#3
0
                num_nodes = len(node_list)
                num_edges = num_nodes - 1

                # constructing the parent-child matrix, would be nice to find a faster way to do this
                parent_child_mat = np.zeros(shape=(num_nodes, num_nodes),
                                            dtype=np.float32)
                for child_idx in range(1,
                                       len(node_list)):  # excluding the root
                    parent_idx = node_list.index(node_list[child_idx].parent)
                    parent_child_mat[parent_idx, child_idx] = 1.0

                pp_mat = build_parent_path_mat(parent_child_mat,
                                               num_edges=num_edges)

                # split the leaves into train and test
                train_idx, valid_idx = split_indices(range(len(leaves)))

                # constructing train and valid x and y matrices
                train_col_idx = [leaves[i][0] for i in train_idx]
                valid_col_idx = [leaves[i][0] for i in valid_idx]
                train_col_idx_tensor = torch.tensor(train_col_idx,
                                                    device=device)
                valid_col_idx_tensor = torch.tensor(valid_col_idx,
                                                    device=device)

                train_x = torch.tensor(np.asarray(
                    [leaves[i][1].x for i in train_idx]),
                                       device=device,
                                       dtype=torch.double)
                train_y = torch.tensor(np.asarray(
                    [leaves[i][1].y for i in train_idx]),
示例#4
0
    val_auc_output = []
    average_time_seed = 0  # to test time performance of the training of this model

    for s in args.seeds:
        init_time = time.time()
        # simple linear model to which a sigmoid will be applied in order to make it a logistic model
        # Used for comparison with DendroNet performance
        # We use a linear regression in order to be able to use BCEWithLogitsLoss as loss function,
        # a more stable version of BCEloss

        logistic = LinRegModel(len(X[0]))
        logistic.to(device)
        logistic = logistic.double()
        best_weights = logistic.lin_1

        train_idx, test_idx = split_indices(range(len(X)), seed=0)
        train_idx, val_idx = split_indices(train_idx, seed=s)

        # creating idx dataset objects for batching
        train_set = IndicesDataset(train_idx)
        val_set = IndicesDataset(val_idx)
        test_set = IndicesDataset(test_idx)

        # Setting some parameters for shuffle batch
        params = {'batch_size': BATCH_SIZE, 'shuffle': True, 'num_workers': 0}

        train_batch_gen = torch.utils.data.DataLoader(train_set, **params)
        val_batch_gen = torch.utils.data.DataLoader(val_set, **params)
        test_batch_gen = torch.utils.data.DataLoader(test_set, **params)

        # converting X and y to tensors, and transferring to GPU if the cuda flag is set
示例#5
0
# each column holds a trainable delta vector for each edge in the graph
edge_tensor_matrix = np.zeros(shape=(num_features, num_edges))
"""
Now we have all the components, and can create an instance of the DendroNet model specific to our graph architecture
We will use a linear regressor as the base architecture
"""
dendronet = DendroMatrixLinReg(device, root_weights, parent_path_tensor,
                               edge_tensor_matrix)
"""
We typically want to split into a train and test/validation set. We will do this by assigning indices to either group so 
that we can keep the mapping between parent_path column ID and the corresponding sample in X and y

We can then use the IndicesDataset class to perform shuffle-batching during training
"""

train_idx, test_idx = split_indices(range(len(y)))

# creating idx dataset objects for batching
train_set = IndicesDataset(train_idx)
test_set = IndicesDataset(test_idx)

# Setting some parameters for shuffle batch
params = {'batch_size': BATCH_SIZE, 'shuffle': True, 'num_workers': 0}

train_batch_gen = torch.utils.data.DataLoader(train_set, **params)
test_batch_gen = torch.utils.data.DataLoader(test_set, **params)

# converting X and y to tensors, and transferring to GPU if the cuda flag is set
X = torch.tensor(X, dtype=torch.double, device=device)
y = torch.tensor(y, dtype=torch.double, device=device)