Exemplo n.º 1
0
def find_shower_gnn(dbscan, groups, em_primaries, energy_data, types, model_name, model_checkpoint, gpu_ind=0, verbose=False):
    """
    NOTE: THIS IS PROBABLY BROKEN; it was written right after the first pi0 workshop
    
    dbscan: data parsed from "dbscan_label": ["parse_dbscan", "sparse3d_fivetypes"]
    groups: data parsed from "group_label": ["parse_cluster3d_clean", "cluster3d_mcst", "sparse3d_fivetypes"]
    em_primaries: data parsed from "em_primaries" : ["parse_em_primaries", "sparse3d_data", "particle_mcst"]
    energy_data: data parsed from "input_data": ["parse_sparse3d_scn", "sparse3d_data"]
    
    returns a list of length len(em_primaries) containing np arrays, each of which contains the indices corresponding to the voxels in the cone of the corresponding EM primary
    """
    event_data = [torch.tensor(dbscan), torch.tensor(em_primaries)]
    torch.cuda.set_device(0)
    model_attn = DataParallel(BasicAttentionModel(model_name),
                              device_ids=[0],
                              dense=False)
    
    model_attn.load_state_dict(torch.load(model_checkpoint, map_location='cuda:'+str(gpu_ind))['state_dict'])
    model_attn.eval().cuda()
    
    
    data_grp = process_group_data(torch.tensor(groups), torch.tensor(dbscan))
    
    clusts = form_clusters_new(dbscan)
    selection = filter_compton(clusts) # non-compton looking clusters
    clusts = clusts[selection]
    full_primaries = np.array(assign_primaries3(em_primaries, clusts, groups))
    primaries = assign_primaries(torch.tensor(em_primaries), clusts, torch.tensor(groups))
    batch = get_cluster_batch(dbscan, clusts)
    edge_index = primary_bipartite_incidence(batch, primaries, cuda=True)
    
    if len(edge_index) == 0: # no secondary clusters
        selected_voxels = []
        for p in full_primaries.astype(int):
            if p == -1:
                selected_voxels.append(np.array([]))
            else:
                selected_voxels.append(clusts[p])
        return selected_voxels
    
    n = len(clusts)
    mask = np.array([(i not in primaries) for i in range(n)])
    others = np.arange(n)[mask]
    
    pred_labels = model_attn(event_data)
    pred_nodes = assign_clusters(edge_index, pred_labels, primaries, others, n)

    count = 0
    selected_voxels = []
    for i in range(len(full_primaries)):
        p = full_primaries[i]
        if p == -1:
            selected_voxels.append(np.array([]))
        else:
            selected_clusts = clusts[np.where(pred_nodes == p)[0]]
            selected_voxels.append(np.concatenate(selected_clusts))
            
    return selected_voxels
Exemplo n.º 2
0
    def forward(self, data):
        """
        inputs data:
            data[0] - dbscan data
            data[1] - primary data
        """
        # need to form graph, then pass through GNN
        clusts = form_clusters_new(data[0])

        # remove track-like particles
        #types = get_cluster_label(data[0], clusts)
        #selection = types > 1 # 0 or 1 are track-like
        #clusts = clusts[selection]

        # remove compton clusters
        # if no cluster fits this condition, return
        selection = filter_compton(clusts)  # non-compton looking clusters
        if not len(selection):
            e = torch.tensor([], requires_grad=True)
            if data[0].is_cuda:
                e.cuda()
            return e

        clusts = clusts[selection]

        # process group data
        # data_grp = process_group_data(data[1], data[0])
        # data_grp = data[1]

        # form primary/secondary bipartite graph
        primaries = assign_primaries(data[1], clusts, data[0])
        batch = get_cluster_batch(data[0], clusts)
        edge_index = primary_bipartite_incidence(batch, primaries, cuda=True)

        # obtain vertex features
        x = cluster_vtx_features(data[0], clusts, cuda=True)
        # x = cluster_vtx_features_old(data[0], clusts, cuda=True)
        #print("max input: ", torch.max(x.view(-1)))
        #print("min input: ", torch.min(x.view(-1)))
        # obtain edge features
        e = cluster_edge_features(data[0], clusts, edge_index, cuda=True)

        # go through layers
        x = self.attn1(x, edge_index)
        #print("max x: ", torch.max(x.view(-1)))
        #print("min x: ", torch.min(x.view(-1)))
        x = self.attn2(x, edge_index)
        #print("max x: ", torch.max(x.view(-1)))
        #print("min x: ", torch.min(x.view(-1)))
        x = self.attn3(x, edge_index)
        #print("max x: ", torch.max(x.view(-1)))
        #print("min x: ", torch.min(x.view(-1)))

        xbatch = torch.tensor(batch).cuda()
        x, e, u = self.edge_predictor(x, edge_index, e, u=None, batch=xbatch)
        print("max edge weight: ", torch.max(e.view(-1)))
        print("min edge weight: ", torch.min(e.view(-1)))
        return e
Exemplo n.º 3
0
    def forward(self, data):
        """
        Input:
            data[0]: (Nx5) Cluster tensor with row (x, y, z, batch_id, cluster_id)
        Output:
        dictionary, with
            'node_pred': torch.tensor with node prediction weights
        """
        # Get device
        cluster_label = data[0]
        device = cluster_label.device

        # Find index of points that belong to the same EM clusters
        clusts = form_clusters_new(cluster_label)

        # If requested, remove clusters below a certain size threshold
        if self.remove_compton:
            selection = np.where(filter_compton(clusts,
                                                self.compton_thresh))[0]
            if not len(selection):
                return self.default_return(device)
            clusts = clusts[selection]

        # Get the cluster ids of each processed cluster
        clust_ids = get_cluster_label(cluster_label, clusts)

        # Get the batch ids of each cluster
        batch_ids = get_cluster_batch(cluster_label, clusts)

        # Form a complete graph (should add options for other structures, TODO)
        edge_index = complete_graph(batch_ids, device=device)
        if not edge_index.shape[0]:
            return self.default_return(device)

        # Obtain vertex features
        x = cluster_vtx_features(cluster_label, clusts, device=device)

        # Obtain edge features
        e = cluster_edge_features(cluster_label,
                                  clusts,
                                  edge_index,
                                  device=device)

        # Convert the the batch IDs to a torch tensor to pass to Torch
        xbatch = torch.tensor(batch_ids).to(device)

        # Pass through the model, get output
        out = self.node_predictor(x, edge_index, e, xbatch)

        return {
            **out, 'clust_ids': [torch.tensor(clust_ids)],
            'batch_ids': [torch.tensor(batch_ids)],
            'edge_index': [edge_index]
        }
Exemplo n.º 4
0
def get_lifetimes(data):
    """
    data: np array of DBSCAN-parsed data with shape (N, 5)
    returns: np array of shape (N,) with the label corresponding to the lifetime of the voxel
        lifetime will be infinity if a voxel is outside a cluster or in a compton scatter
    """
    all_lifetimes = np.inf * np.ones(len(data))
    clusts = form_clusters_new(data)

    # remove compton clusters
    selection = filter_compton(clusts)
    clusts = clusts[selection]

    non_compton = np.concatenate(clusts)
    cluster_features = get_cluster_features(data, clusts)
    for i in range(len(clusts)):
        clust = clusts[i]
        mean = cluster_features[:, :3][i]
        direction = cluster_features[:, -3:][i]

        coords = data[clust][:, :3]
        f = np.dot(coords - mean, direction)
        box_dim = 1
        edges = []
        for i in range(len(coords)):
            point = coords[i][:3]
            x, y, z = point
            region = coords
            indices = np.arange(len(coords))
            indices = indices[np.searchsorted(region[:, 2], z - box_dim):]
            region = coords[indices]
            indices = indices[:np.searchsorted(
                region[:, 2], z + box_dim, side='right')]
            region = coords[indices]
            indices = indices[np.where((region[:, 1] >= y - box_dim)
                                       & (region[:, 1] <= y + box_dim)
                                       & (region[:, 0] >= x - box_dim)
                                       & (region[:, 0] <= x + box_dim))]
            region = coords[indices]
            for j in indices:
                if i != j:
                    entry = sorted((i, j))
                    if entry not in edges:
                        edges.append(entry)
        edges = np.array(edges)

        births, deaths, edge_list = merge_diagram(f, edges)
        lifetimes = deaths - births
        print(lifetimes)
        all_lifetimes[clust] = lifetimes
        print(all_lifetimes[clust])
    return all_lifetimes
Exemplo n.º 5
0
    def forward(self, data):
        """
        inputs data:
            data[0] - dbscan data
        output:
        dictionary, with
            'edge_pred': torch.tensor with edge prediction weights
        """
        # get device
        device = data[0].device

        # need to form graph, then pass through GNN
        clusts = form_clusters_new(data[0])

        # remove compton clusters
        # if no cluster fits this condition, return
        if self.remove_compton:
            selection = filter_compton(
                clusts, self.compton_thresh)  # non-compton looking clusters
            if not len(selection):
                e = torch.tensor([], requires_grad=True)
                e.to(device)
                return {'edge_pred': [e]}

            clusts = clusts[selection]

        # form graph
        batch = get_cluster_batch(data[0], clusts)
        edge_index = complete_graph(batch, device=device)

        if not edge_index.shape[0]:
            e = torch.tensor([], requires_grad=True)
            e.to(device)
            return {'edge_pred': [e]}

        # obtain vertex directions
        x = cluster_vtx_dirs(data[0], clusts, device=device)

        # obtain edge directions
        e = cluster_edge_dirs(data[0], clusts, edge_index, device=device)

        # get x batch
        xbatch = torch.tensor(batch).to(device)

        # get output
        outdict = self.edge_predictor(x, edge_index, e, xbatch)

        return outdict
    def forward(self, data):
        """
        inputs data:
            data[0] - dbscan data
        """

        # need to form graph, then pass through GNN
        clusts = form_clusters_new(data[0])

        # remove compton clusters (should we?)
        # if no cluster fits this condition, return
        selection = filter_compton(clusts)  # non-compton looking clusters
        if not len(selection):
            x = torch.tensor([], requires_grad=True)
            if data[0].is_cuda:
                x.cuda()
            return x

        clusts = clusts[selection]

        # form complete graph
        batch = get_cluster_batch(data[0], clusts)
        edge_index = complete_graph(batch, cuda=True)
        if not len(edge_index):
            x = torch.tensor([], requires_grad=True)
            if data[0].is_cuda:
                x.cuda()
            return x

        batch = torch.tensor(batch)
        if data[0].is_cuda:
            batch = batch.cuda()

        # obtain vertex features
        #x = cluster_vtx_features(data[0], clusts, cuda=True)
        x = cluster_vtx_features_old(data[0], clusts, cuda=True)

        # go through layers
        x = self.econv1(x, edge_index)
        x = self.econv2(x, edge_index)
        x = self.econv3(x, edge_index)

        x, e, u = self.predictor(x,
                                 edge_index,
                                 edge_attr=None,
                                 u=None,
                                 batch=batch)
        return F.log_softmax(x, dim=1)
    def forward(self, node_pred, data0, data1):
        """
        node_pred:
            predicted node type from model forward
        data:
            data[0] - 5 types data
            data[1] - primary data
        """
        data0 = data0[0]
        data1 = data1[0]
        # first decide what true edges should be
        # need to form graph, then pass through GNN
        # clusts = form_clusters(data0)
        clusts = form_clusters_new(data0)

        # remove track-like particles
        # types = get_cluster_label(data0, clusts)
        # selection = types > 1 # 0 or 1 are track-like
        # clusts = clusts[selection]

        # remove compton clusters
        # if no cluster fits this condition, return
        selection = filter_compton(clusts)  # non-compton looking clusters
        if not len(selection):
            total_loss = self.lossfn(node_pred, node_pred)
            return {'accuracy': 1., 'loss_seg': total_loss}

        clusts = clusts[selection]

        # get the true node labels
        primaries = assign_primaries(data1, clusts, data0)
        #node_assn = torch.tensor([2*float(i in primaries)-1. for i in range(len(clusts))]) # must return -1 or 1
        node_assn = torch.tensor([
            int(i in primaries) for i in range(len(clusts))
        ])  # must return 0 or 1
        if node_pred.is_cuda:
            node_assn = node_assn.cuda()

        node_assn = node_assn.view(-1)
        #node_pred = node_pred.view(-1)

        weights = torch.tensor([1., 1.])
        if node_pred.is_cuda:
            weights = weights.cuda()

        if self.balance:
            ind0 = node_assn == 0
            ind1 = node_assn == 1
            # number in each class
            n0 = torch.sum(ind0).float()
            n1 = torch.sum(ind1).float()
            weights[0] = n1 / (n0 + n1)
            weights[1] = n0 / (n0 + n1)
            print('class sizes', n0, n1)

        #total_loss = self.lossfn(node_pred, node_assn)
        print('weights', weights)
        total_loss = F.nll_loss(node_pred, node_assn, weight=weights)
        print(total_loss)

        # compute accuracy of assignment
        preds = torch.argmin(node_pred, dim=1)
        print(node_pred)
        print(preds)
        tot_vox = np.sum([len(c) for c in clusts])
        int_vox = np.sum([
            len(clusts[i]) for i in range(len(clusts))
            if node_assn[i] == preds[i]
        ])
        total_acc = int_vox * 1.0 / tot_vox
        #total_acc = torch.tensor(primary_assign_vox_efficiency(node_assn, node_pred, clusts))

        return {'accuracy': total_acc, 'loss_seg': total_loss}
Exemplo n.º 8
0
    def forward(self, data):
        """
        input data:
            data[0] - dbscan data
            data[1] - primary data
        output data:
            dictionary with following keys:
                edges     : list of edge_index tensors used for edge prediction
                edge_pred : list of torch tensors with edge prediction weights
                matched   : numpy array of group for each cluster (identified by primary index)
                n_iter    : number of iterations taken
            each list is of length k, where k is the number of times the iterative network is applied
        """
        # need to form graph, then pass through GNN
        clusts = form_clusters_new(data[0])

        # remove compton clusters
        # if no cluster fits this condition, return
        if self.remove_compton:
            selection = filter_compton(
                clusts, self.compton_thresh)  # non-compton looking clusters
            if not len(selection):
                e = torch.tensor([], requires_grad=True)
                if data[0].is_cuda:
                    e = e.cuda()
                return e

            clusts = clusts[selection]

        #others = np.array([(i not in primaries) for i in range(n)])
        batch = get_cluster_batch(data[0], clusts)
        # get x batch
        xbatch = torch.tensor(batch).cuda()

        primaries = assign_primaries(data[1],
                                     clusts,
                                     data[0],
                                     max_dist=self.pmd)
        # keep track of who is matched. -1 is not matched
        matched = np.repeat(-1, len(clusts))
        matched[primaries] = primaries
        # print(matched)

        edges = []
        edge_pred = []

        counter = 0
        found_match = True

        while (-1 in matched) and (counter < self.maxiter) and found_match:
            # continue until either:
            # 1. everything is matched
            # 2. we have exceeded the max number of iterations
            # 3. we didn't find any matches

            #print('iter ', counter)
            counter = counter + 1

            # get matched indices
            assigned = np.where(matched > -1)[0]
            # print(assigned)
            others = np.where(matched == -1)[0]

            edge_index = primary_bipartite_incidence(batch,
                                                     assigned,
                                                     cuda=True)
            # check if there are any edges to predict
            # also batch norm will fail on only 1 edge, so break if this is the case
            if edge_index.shape[1] < 2:
                counter -= 1
                break

            # obtain vertex features
            x = cluster_vtx_features(data[0], clusts, cuda=True)
            # obtain edge features
            e = cluster_edge_features(data[0], clusts, edge_index, cuda=True)
            # print(x.shape)
            # print(torch.max(edge_index))
            # print(torch.min(edge_index))

            out = self.edge_predictor(x, edge_index, e, xbatch)

            # predictions for this edge set.
            edge_pred.append(out[0][0])
            edges.append(edge_index)

            #print(out[0][0].shape)

            matched, found_match = self.assign_clusters(
                edge_index, out[0][0][:, 1] - out[0][0][:, 0], others, matched,
                self.thresh)

            # print(edges)
            # print(edge_pred)

        #print('num iterations: ', counter)

        matched = torch.tensor(matched)
        counter = torch.tensor([counter])
        if data[0].is_cuda:
            matched = matched.cuda()
            counter = counter.cuda()

        return {
            'edges': [edges],
            'edge_pred': [edge_pred],
            'matched': [matched],
            'counter': [counter]
        }
Exemplo n.º 9
0
    def forward(self, out, clusters, groups, primary):
        """
        out:
            array output from the DataParallel gather function
            out[0] - n_gpus tensors of edge indexes
            out[1] - n_gpus tensors of predicted edge weights from model forward
            out[2] - n_gpus arrays of group ids for each cluster
            out[3] - n_gpus number of iterations
        data:
            cluster_labels - n_gpus Nx5 tensors of (x, y, z, batch_id, cluster_id)
            group_labels - n_gpus Nx5 tensors of (x, y, z, batch_id, group_id) 
            em_primaries - n_gpus tensor of (x, y, z) coordinates of origins of EM primaries
        """
        total_loss, total_acc, total_primary_fdr, total_primary_acc, total_iter = 0., 0., 0., 0., 0
        ngpus = len(clusters)
        for i in range(ngpus):
            data0 = clusters[i]
            data1 = groups[i]
            data2 = primary[i]

            clusts = form_clusters_new(data0)

            # remove compton clusters
            # if no cluster fits this condition, return
            if self.remove_compton:
                selection = filter_compton(
                    clusts)  # non-compton looking clusters
                if not len(selection):
                    edge_pred = out[1][i]
                    total_loss += self.lossfn(edge_pred, edge_pred)
                    total_acc += 1.
                    continue

            clusts = clusts[selection]

            # process group data
            data_grp = data1

            # form primary/secondary bipartite graph
            primaries = assign_primaries(data2, clusts, data0)
            batch = get_cluster_batch(data0, clusts)
            # edge_index = primary_bipartite_incidence(batch, primaries)
            group = get_cluster_label(data_grp, clusts)

            primaries_true = assign_primaries(data2,
                                              clusts,
                                              data1,
                                              use_labels=True)
            primary_fdr, primary_tdr, primary_acc = analyze_primaries(
                primaries, primaries_true)
            total_primary_fdr += primary_fdr
            total_primary_acc += primary_acc

            niter = out[3][i][0]  # number of iterations
            total_iter += niter
            for j in range(niter):
                # determine true assignments
                edge_index = out[0][i][j]
                edge_assn = edge_assignment(edge_index,
                                            batch,
                                            group,
                                            cuda=True)

                edge_pred = out[1][i][j]
                # print(edge_pred)

                # print(edge_assn.shape)
                # print(edge_pred.shape)
                edge_assn = edge_assn.view(-1)
                edge_pred = edge_pred.view(-1)
                # print(edge_assn.shape)
                # print(edge_pred.shape)

                if self.balance:
                    edge_assn, edge_pred = self.balance_classes(
                        edge_assn, edge_pred)

                total_loss += self.lossfn(edge_pred, edge_assn)

            # compute accuracy of assignment
            # need to multiply by batch size to be accurate
            #total_acc = (np.max(batch) + 1) * torch.tensor(secondary_matching_vox_efficiency(edge_index, edge_assn, edge_pred, primaries, clusts, len(clusts)))
            # use out['matched']
            total_acc += torch.tensor(
                secondary_matching_vox_efficiency2(out[2][i], group, primaries,
                                                   clusts))

        return {
            'primary_fdr': total_primary_fdr / ngpus,
            'primary_acc': total_primary_acc / ngpus,
            'accuracy': total_acc / ngpus,
            'loss': total_loss / ngpus,
            'n_iter': total_iter
        }
Exemplo n.º 10
0
    def forward(self, out, clusters, groups, primary):
        """
        out:
            array output from the DataParallel gather function
            out[0] - n_gpus tensors of edge indexes
            out[1] - n_gpus tensors of predicted edge weights from model forward
            out[2] - n_gpus arrays of group ids for each cluster
            out[3] - n_gpus number of iterations
        data:
            cluster_labels - n_gpus Nx5 tensors of (x, y, z, batch_id, cluster_id)
            group_labels - n_gpus Nx5 tensors of (x, y, z, batch_id, group_id) 
            em_primaries - n_gpus tensor of (x, y, z) coordinates of origins of EM primaries
        """
        total_loss, total_acc, total_primary_fdr, total_primary_acc, total_iter = 0., 0., 0., 0., 0
        total_ari, total_ami, total_sbd, total_pur, total_eff = 0., 0., 0., 0., 0.
        ngpus = len(clusters)
        for i in range(ngpus):
            data0 = clusters[i]
            data1 = groups[i]
            data2 = primary[i]

            clusts = form_clusters_new(data0)

            # remove compton clusters
            # if no cluster fits this condition, return
            if self.remove_compton:
                selection = filter_compton(
                    clusts)  # non-compton looking clusters
                if not len(selection):
                    edge_pred = out[1][i][0]
                    total_loss += self.lossfn(edge_pred, edge_pred)
                    total_acc += 1.

            clusts = clusts[selection]

            # process group data
            data_grp = data1

            # form primary/secondary bipartite graph
            primaries = assign_primaries(data2, clusts, data0)
            batch = get_cluster_batch(data0, clusts)
            # edge_index = primary_bipartite_incidence(batch, primaries)
            group = get_cluster_label(data_grp, clusts)

            primaries_true = assign_primaries(data2,
                                              clusts,
                                              data1,
                                              use_labels=True)
            primary_fdr, primary_tdr, primary_acc = analyze_primaries(
                primaries, primaries_true)
            total_primary_fdr += primary_fdr
            total_primary_acc += primary_acc

            niter = out[3][i][0]  # number of iterations
            total_iter += niter

            # loop over iterations and add loss at each iter.
            for j in range(niter):
                # determine true assignments
                edge_index = out[0][i][j]
                edge_assn = edge_assignment(edge_index,
                                            batch,
                                            group,
                                            cuda=True,
                                            dtype=torch.long)

                # get edge predictions (2 channels)
                edge_pred = out[1][i][j]

                edge_assn = edge_assn.view(-1)

                total_loss += self.lossfn(edge_pred, edge_assn)

            # compute accuracy of assignment
            total_acc += secondary_matching_vox_efficiency2(
                out[2][i], group, primaries, clusts)

            # get clustering metrics
            #print(out[2][i].shape)
            ari, ami, sbd, pur, eff = DBSCAN_cluster_metrics2(
                out[2][i].cpu().numpy(), clusts, group)
            total_ari += ari
            total_ami += ami
            total_sbd += sbd
            total_pur += pur
            total_eff += eff

        return {
            'primary_fdr': total_primary_fdr / ngpus,
            'primary_acc': total_primary_acc / ngpus,
            'ARI': ari / ngpus,
            'AMI': ami / ngpus,
            'SBD': sbd / ngpus,
            'purity': pur / ngpus,
            'efficiency': eff / ngpus,
            'accuracy': total_acc / ngpus,
            'loss': total_loss / ngpus,
            'n_iter': total_iter
        }
Exemplo n.º 11
0
def find_shower_cone(dbscan,
                     em_primaries,
                     energy_data,
                     types,
                     length_factor=14.107334041,
                     slope_percentile=52.94032412,
                     slope_factor=5.86322059):
    """
    dbscan: data parsed from "dbscan_label": ["parse_dbscan", "sparse3d_fivetypes"]
    em_primaries: data parsed from "em_primaries" : ["parse_em_primaries", "sparse3d_data", "particle_mcst"]
    energy_data: data parsed from "input_data": ["parse_sparse3d_scn", "sparse3d_data"]
    
    returns a list of length len(em_primaries) containing np arrays, each of which contains the indices corresponding to the voxels in the cone of the corresponding EM primary
    """
    clusts = form_clusters_new(dbscan)
    selected_voxels = []
    true_voxels = []

    if len(clusts) == 0:
        # assignn everything to first primary
        selected_voxels.append(np.arange(len(dbscan)))
        print('all clusters identified as Compton')
        return selected_voxels
    assigned_primaries = assign_primaries_unique(em_primaries, clusts,
                                                 types).astype(int)
    for i in range(len(assigned_primaries)):
        if assigned_primaries[i] != -1:
            c = clusts[assigned_primaries[i]]

            p = em_primaries[i]
            em_point = p[:3]

            # find primary cluster axis
            primary_points = dbscan[c][:, :3]
            primary_energies = energy_data[c][:, -1]
            if np.sum(primary_energies) == 0:
                selected_voxels.append(np.array([]))
                continue
            primary_center = np.average(primary_points.T,
                                        axis=1,
                                        weights=primary_energies)
            primary_axis = primary_center - em_point

            # find furthest particle from cone axis
            primary_length = np.linalg.norm(primary_axis)
            direction = primary_axis / primary_length
            axis_distances = np.linalg.norm(np.cross(
                primary_points - primary_center, primary_points - em_point),
                                            axis=1) / primary_length
            axis_projections = np.dot(primary_points - em_point, direction)
            primary_slope = np.percentile(axis_distances / axis_projections,
                                          slope_percentile)

            # define a cone around the primary axis
            cone_length = length_factor * primary_length
            cone_slope = slope_factor * primary_slope
            cone_vertex = em_point
            cone_axis = direction

            classified_indices = []
            for j in range(len(dbscan)):
                point = types[j]
                if point[-1] < 2:
                    continue
                coord = point[:3]
                axis_dist = np.dot(coord - em_point, cone_axis)
                if 0 <= axis_dist and axis_dist <= cone_length:
                    cone_radius = axis_dist * cone_slope
                    point_radius = np.linalg.norm(
                        np.cross(coord - (em_point + cone_axis),
                                 coord - em_point))
                    if point_radius < cone_radius:
                        # point inside cone
                        classified_indices.append(j)
            classified_indices = np.array(classified_indices)
            selected_voxels.append(classified_indices)
        else:
            selected_voxels.append(np.array([]))

    return selected_voxels
Exemplo n.º 12
0
def cluster(positions,
            em_primaries,
            params=[14.107334041, 52.94032412, 5.86322059, 1.01],
            inclusive=True):
    """
    positions: Nx3 array of EM shower voxel positions
    em_primaries: Nx3 array of EM primary positions
    
    if inclusive=True: returns a list of length len(em_primaries) containing np arrays, each of which contains the indices corresponding to the voxels in the cone of the corresponding EM primary; note that each voxel might thus have multiple labels
    if inclusive=False: returns a tuple (arr of length len(em_primaries), arr of length len(positions)) corresponding to EM primary labels and the voxel labels; note that each voxel has a unique label
    """
    length_factor = params[0]
    slope_percentile = params[1]
    slope_factor = params[2]

    dbscan = DBSCAN(eps=params[3],
                    min_samples=3).fit(positions).labels_.reshape(-1, 1)
    dbscan = np.concatenate((positions, np.zeros((len(positions), 1)), dbscan),
                            axis=1)

    clusts = form_clusters_new(dbscan)
    selected_voxels = []
    true_voxels = []

    if len(clusts) == 0:
        # assignn everything to first primary
        selected_voxels.append(np.arange(len(dbscan)))
        print('all clusters identified as Compton')
        return selected_voxels
    assigned_primaries = assign_primaries_unique(
        np.concatenate((em_primaries, np.zeros((len(em_primaries), 2))),
                       axis=1), clusts,
        np.concatenate((positions, np.zeros((len(positions), 2))),
                       axis=1)).astype(int)
    for i in range(len(assigned_primaries)):
        if assigned_primaries[i] != -1:
            c = clusts[assigned_primaries[i]]

            p = em_primaries[i]
            em_point = p[:3]

            # find primary cluster axis
            primary_points = dbscan[c][:, :3]
            primary_center = np.average(primary_points.T, axis=1)
            primary_axis = primary_center - em_point

            # find furthest particle from cone axis
            primary_length = np.linalg.norm(primary_axis)
            direction = primary_axis / primary_length
            axis_distances = np.linalg.norm(np.cross(
                primary_points - primary_center, primary_points - em_point),
                                            axis=1) / primary_length
            axis_projections = np.dot(primary_points - em_point, direction)
            primary_slope = np.percentile(axis_distances / axis_projections,
                                          slope_percentile)

            # define a cone around the primary axis
            cone_length = length_factor * primary_length
            cone_slope = slope_factor * primary_slope
            cone_vertex = em_point
            cone_axis = direction

            classified_indices = []
            for j in range(len(dbscan)):
                point = positions[j]
                coord = point[:3]
                axis_dist = np.dot(coord - em_point, cone_axis)
                if 0 <= axis_dist and axis_dist <= cone_length:
                    cone_radius = axis_dist * cone_slope
                    point_radius = np.linalg.norm(
                        np.cross(coord - (em_point + cone_axis),
                                 coord - em_point))
                    if point_radius < cone_radius:
                        # point inside cone
                        classified_indices.append(j)
            classified_indices = np.array(classified_indices)
            selected_voxels.append(classified_indices)
        else:
            selected_voxels.append(np.array([]))

    # don't require that each voxel can only be in one group
    if inclusive:
        return selected_voxels

    # require each voxel can only be in one group (order groups in descending size to overwrite large groups)
    em_primary_labels = -np.ones(len(selected_voxels))
    node_labels = -np.ones(len(positions))
    lengths = []
    for group in selected_voxels:
        lengths.append(len(group))
    sorter = np.argsort(lengths)[::-1]
    for l in range(len(selected_voxels)):
        if len(selected_voxels[sorter[l]]) > 0:
            node_labels[selected_voxels[sorter[l]]] = l
            em_primary_labels[sorter[l]] = l

    labeled = np.where(node_labels != -1)
    unlabeled = np.where(node_labels == -1)
    if len(labeled[0]) > 5 and len(unlabeled[0]) > 0:
        classified_positions = positions[labeled]
        unclassified_positions = positions[unlabeled]
        cl = KNeighborsClassifier(n_neighbors=2)
        cl.fit(classified_positions, node_labels[labeled])
        node_labels[unlabeled] = cl.predict(unclassified_positions)

    return em_primary_labels, node_labels
Exemplo n.º 13
0
    def forward(self, out, data0, data1):
        """
        out:
            dictionary output from GNN Model
            keys:
                'edge_pred': predicted edge weights from model forward
        data:
            data[0] - DBSCAN data
            data[1] - groups data
        """
        edge_pred = out[0][0]
        data0 = data0[0]
        data1 = data1[0]

        device = data0.device

        # first decide what true edges should be
        # need to form graph, then pass through GNN
        # clusts = form_clusters(data0)
        clusts = form_clusters_new(data0)

        # remove compton clusters
        # if no cluster fits this condition, return
        if self.remove_compton:
            selection = filter_compton(
                clusts, self.compton_thresh)  # non-compton looking clusters
            if not len(selection):
                total_loss = self.lossfn(edge_pred, edge_pred)
                return {'accuracy': 1., 'loss': total_loss}

            clusts = clusts[selection]

        # process group data
        # data_grp = process_group_data(data1, data0)
        data_grp = data1

        # form graph
        batch = get_cluster_batch(data0, clusts)
        edge_index = complete_graph(batch, device=device)

        if not edge_index.shape[0]:
            total_loss = self.lossfn(edge_pred, edge_pred)
            return {'accuracy': 0., 'loss': total_loss}
        group = get_cluster_label(data_grp, clusts)

        # determine true assignments
        edge_assn = edge_assignment(edge_index,
                                    batch,
                                    group,
                                    device=device,
                                    dtype=torch.long)

        edge_assn = edge_assn.view(-1)

        # total loss on batch
        total_loss = self.lossfn(edge_pred, edge_assn)

        # compute assigned clusters
        fe = edge_pred[1, :] - edge_pred[0, :]
        cs = assign_clusters_UF(edge_index, fe, len(clusts), thresh=0.0)

        ari, ami, sbd, pur, eff = DBSCAN_cluster_metrics2(cs, clusts, group)

        edge_ct = edge_index.shape[1]

        return {
            'ARI': ari,
            'AMI': ami,
            'SBD': sbd,
            'purity': pur,
            'efficiency': eff,
            'accuracy': ari,
            'loss': total_loss,
            'edge_count': edge_ct
        }
Exemplo n.º 14
0
    def forward(self, edge_pred, data0, data1, data2):
        """
        edge_pred:
            predicted edge weights from model forward
        data:
            data[0] - 5 types data
            data[1] - groups data
            data[2] - primary data
        """
        data0 = data0[0]
        data1 = data1[0]
        data2 = data2[0]
        # first decide what true edges should be
        # need to form graph, then pass through GNN
        # clusts = form_clusters(data0)
        clusts = form_clusters_new(data0)

        # remove track-like particles
        # types = get_cluster_label(data0, clusts)
        # selection = types > 1 # 0 or 1 are track-like
        # clusts = clusts[selection]

        # remove compton clusters
        # if no cluster fits this condition, return
        selection = filter_compton(clusts)  # non-compton looking clusters
        if not len(selection):
            total_loss = self.lossfn(edge_pred, edge_pred)
            return {'accuracy': 1., 'loss_seg': total_loss}

        clusts = clusts[selection]

        # process group data
        # data_grp = process_group_data(data1, data0)
        data_grp = data1

        # form primary/secondary bipartite graph
        primaries = assign_primaries(data2, clusts, data0)
        batch = get_cluster_batch(data0, clusts)
        edge_index = primary_bipartite_incidence(batch, primaries)
        group = get_cluster_label(data_grp, clusts)

        primaries_true = assign_primaries(data2,
                                          clusts,
                                          data1,
                                          use_labels=True)
        print("primaries (est):  ", primaries)
        print("primaries (true): ", primaries_true)

        # determine true assignments
        edge_assn = edge_assignment(edge_index, batch, group, cuda=True)

        edge_assn = edge_assn.view(-1)
        edge_pred = edge_pred.view(-1)

        if self.balance:
            # weight edges so that 0/1 labels appear equally often
            ind0 = edge_assn == 0
            ind1 = edge_assn == 1
            # number in each class
            n0 = torch.sum(ind0).float()
            n1 = torch.sum(ind1).float()
            print("n0 = ", n0, " n1 = ", n1)
            # weights to balance classes
            w0 = n1 / (n0 + n1)
            w1 = n0 / (n0 + n1)
            print("w0 = ", w0, " w1 = ", w1)
            edge_assn[ind0] = w0 * edge_assn[ind0]
            edge_assn[ind1] = w1 * edge_assn[ind1]
            edge_pred = edge_pred.clone()
            edge_pred[ind0] = w0 * edge_pred[ind0]
            edge_pred[ind1] = w1 * edge_pred[ind1]

        total_loss = self.lossfn(edge_pred, edge_assn)

        # compute accuracy of assignment
        # need to multiply by batch size to be accurate
        total_acc = (np.max(batch) + 1) * torch.tensor(
            secondary_matching_vox_efficiency(edge_index, edge_assn, edge_pred,
                                              primaries, clusts, len(clusts)))

        return {'accuracy': total_acc, 'loss_seg': total_loss}
Exemplo n.º 15
0
def find_shower_cone(dbscan,
                     groups,
                     em_primaries,
                     energy_data,
                     types,
                     length_factor=14.107334041,
                     slope_percentile=52.94032412,
                     slope_factor=5.86322059,
                     return_truth=False,
                     verbose=False):
    """
    dbscan: data parsed from "dbscan_label": ["parse_dbscan", "sparse3d_fivetypes"]
    groups: data parsed from "group_label": ["parse_cluster3d_clean", "cluster3d_mcst", "sparse3d_fivetypes"]
    em_primaries: data parsed from "em_primaries" : ["parse_em_primaries", "sparse3d_data", "particle_mcst"]
    energy_data: data parsed from "input_data": ["parse_sparse3d_scn", "sparse3d_data"]
    types: (???) Fivetypes label Tensor (N x 5)

    returns a list of length len(em_primaries) containing np arrays, each of which contains the indices corresponding to the voxels in the cone of the corresponding EM primary
    """
    length_factor = params[0]
    slope_percentile = params[1]
    slope_factor = params[2]

    dbscan = DBSCAN(eps=params[3],
                    min_samples=3).fit(positions).labels_.reshape(-1, 1)
    dbscan = np.concatenate((positions, np.zeros((len(positions), 1)), dbscan),
                            axis=1)

    clusts = form_clusters_new(dbscan)
    assigned_primaries = assign_primaries_unique(em_primaries,
                                                 clusts,
                                                 groups,
                                                 use_labels=True).astype(int)
    selected_voxels = []
    true_voxels = []
    cone_params_list = []
    for i in range(len(assigned_primaries)):
        if assigned_primaries[i] != -1:
            c = clusts[assigned_primaries[i]]

            if return_truth:
                group_ids = np.unique(groups[c][:, -1])
                type_id = -1
                for g in groups[c]:
                    for j in range(len(types)):
                        if np.array_equal(g[:3], types[j][:3]):
                            type_id = types[j][-1]
                            break
                    if type_id != -1:
                        break
                true_indices = np.where(
                    np.logical_and(np.isin(groups[:, -1], group_ids),
                                   types[:, -1] >= 2))[0]
                true_voxels.append(true_indices)

            p = em_primaries[i]
            em_point = p[:3]

            # find primary cluster axis
            primary_points = dbscan[c][:, :3]
            primary_center = np.average(primary_points.T, axis=1)
            primary_axis = primary_center - em_point

            # find furthest particle from cone axis (???)
            # COMMENT: Maybe not the furthest particle? This seems to select the slope by percentile.
            primary_length = np.linalg.norm(primary_axis)
            direction = primary_axis / primary_length
            axis_distances = np.linalg.norm(np.cross(
                primary_points - primary_center, primary_points - em_point),
                                            axis=1) / primary_length
            axis_projections = np.dot(primary_points - em_point, direction)
            primary_slope = np.percentile(axis_distances / axis_projections,
                                          slope_percentile)

            # define a cone around the primary axis
            cone_length = length_factor * primary_length
            cone_slope = slope_factor * primary_slope
            cone_vertex = em_point
            cone_axis = direction

            cone_params = (cone_length, cone_slope, cone_vertex, cone_axis)
            cone_params_list.append(cone_params)

            classified_indices = []
            # Should be able to vectorize operation.
            for j in range(len(dbscan)):
                point = types[j]
                if point[-1] < 2:
                    # ??? Why not != 2?
                    continue
                coord = point[:3]
                axis_dist = np.dot(coord - em_point, cone_axis)
                if 0 <= axis_dist and axis_dist <= cone_length:
                    cone_radius = axis_dist * cone_slope
                    point_radius = np.linalg.norm(
                        np.cross(coord - (em_point + cone_axis),
                                 coord - em_point))
                    if point_radius < cone_radius:
                        # point inside cone
                        classified_indices.append(j)
            classified_indices = np.array(classified_indices)
            selected_voxels.append(classified_indices)
        else:
            selected_voxels.append(np.array([]))

    if return_truth:
        return true_voxels, selected_voxels, cone_params_list
    else:
        return selected_voxels, cone_params_list