def find_shower_gnn(dbscan, groups, em_primaries, energy_data, types, model_name, model_checkpoint, gpu_ind=0, verbose=False): """ NOTE: THIS IS PROBABLY BROKEN; it was written right after the first pi0 workshop dbscan: data parsed from "dbscan_label": ["parse_dbscan", "sparse3d_fivetypes"] groups: data parsed from "group_label": ["parse_cluster3d_clean", "cluster3d_mcst", "sparse3d_fivetypes"] em_primaries: data parsed from "em_primaries" : ["parse_em_primaries", "sparse3d_data", "particle_mcst"] energy_data: data parsed from "input_data": ["parse_sparse3d_scn", "sparse3d_data"] returns a list of length len(em_primaries) containing np arrays, each of which contains the indices corresponding to the voxels in the cone of the corresponding EM primary """ event_data = [torch.tensor(dbscan), torch.tensor(em_primaries)] torch.cuda.set_device(0) model_attn = DataParallel(BasicAttentionModel(model_name), device_ids=[0], dense=False) model_attn.load_state_dict(torch.load(model_checkpoint, map_location='cuda:'+str(gpu_ind))['state_dict']) model_attn.eval().cuda() data_grp = process_group_data(torch.tensor(groups), torch.tensor(dbscan)) clusts = form_clusters_new(dbscan) selection = filter_compton(clusts) # non-compton looking clusters clusts = clusts[selection] full_primaries = np.array(assign_primaries3(em_primaries, clusts, groups)) primaries = assign_primaries(torch.tensor(em_primaries), clusts, torch.tensor(groups)) batch = get_cluster_batch(dbscan, clusts) edge_index = primary_bipartite_incidence(batch, primaries, cuda=True) if len(edge_index) == 0: # no secondary clusters selected_voxels = [] for p in full_primaries.astype(int): if p == -1: selected_voxels.append(np.array([])) else: selected_voxels.append(clusts[p]) return selected_voxels n = len(clusts) mask = np.array([(i not in primaries) for i in range(n)]) others = np.arange(n)[mask] pred_labels = model_attn(event_data) pred_nodes = assign_clusters(edge_index, pred_labels, primaries, others, n) count = 0 selected_voxels = [] for i in range(len(full_primaries)): p = full_primaries[i] if p == -1: selected_voxels.append(np.array([])) else: selected_clusts = clusts[np.where(pred_nodes == p)[0]] selected_voxels.append(np.concatenate(selected_clusts)) return selected_voxels
def forward(self, data): """ inputs data: data[0] - dbscan data data[1] - primary data """ # need to form graph, then pass through GNN clusts = form_clusters_new(data[0]) # remove track-like particles #types = get_cluster_label(data[0], clusts) #selection = types > 1 # 0 or 1 are track-like #clusts = clusts[selection] # remove compton clusters # if no cluster fits this condition, return selection = filter_compton(clusts) # non-compton looking clusters if not len(selection): e = torch.tensor([], requires_grad=True) if data[0].is_cuda: e.cuda() return e clusts = clusts[selection] # process group data # data_grp = process_group_data(data[1], data[0]) # data_grp = data[1] # form primary/secondary bipartite graph primaries = assign_primaries(data[1], clusts, data[0]) batch = get_cluster_batch(data[0], clusts) edge_index = primary_bipartite_incidence(batch, primaries, cuda=True) # obtain vertex features x = cluster_vtx_features(data[0], clusts, cuda=True) # x = cluster_vtx_features_old(data[0], clusts, cuda=True) #print("max input: ", torch.max(x.view(-1))) #print("min input: ", torch.min(x.view(-1))) # obtain edge features e = cluster_edge_features(data[0], clusts, edge_index, cuda=True) # go through layers x = self.attn1(x, edge_index) #print("max x: ", torch.max(x.view(-1))) #print("min x: ", torch.min(x.view(-1))) x = self.attn2(x, edge_index) #print("max x: ", torch.max(x.view(-1))) #print("min x: ", torch.min(x.view(-1))) x = self.attn3(x, edge_index) #print("max x: ", torch.max(x.view(-1))) #print("min x: ", torch.min(x.view(-1))) xbatch = torch.tensor(batch).cuda() x, e, u = self.edge_predictor(x, edge_index, e, u=None, batch=xbatch) print("max edge weight: ", torch.max(e.view(-1))) print("min edge weight: ", torch.min(e.view(-1))) return e
def forward(self, data): """ Input: data[0]: (Nx5) Cluster tensor with row (x, y, z, batch_id, cluster_id) Output: dictionary, with 'node_pred': torch.tensor with node prediction weights """ # Get device cluster_label = data[0] device = cluster_label.device # Find index of points that belong to the same EM clusters clusts = form_clusters_new(cluster_label) # If requested, remove clusters below a certain size threshold if self.remove_compton: selection = np.where(filter_compton(clusts, self.compton_thresh))[0] if not len(selection): return self.default_return(device) clusts = clusts[selection] # Get the cluster ids of each processed cluster clust_ids = get_cluster_label(cluster_label, clusts) # Get the batch ids of each cluster batch_ids = get_cluster_batch(cluster_label, clusts) # Form a complete graph (should add options for other structures, TODO) edge_index = complete_graph(batch_ids, device=device) if not edge_index.shape[0]: return self.default_return(device) # Obtain vertex features x = cluster_vtx_features(cluster_label, clusts, device=device) # Obtain edge features e = cluster_edge_features(cluster_label, clusts, edge_index, device=device) # Convert the the batch IDs to a torch tensor to pass to Torch xbatch = torch.tensor(batch_ids).to(device) # Pass through the model, get output out = self.node_predictor(x, edge_index, e, xbatch) return { **out, 'clust_ids': [torch.tensor(clust_ids)], 'batch_ids': [torch.tensor(batch_ids)], 'edge_index': [edge_index] }
def get_lifetimes(data): """ data: np array of DBSCAN-parsed data with shape (N, 5) returns: np array of shape (N,) with the label corresponding to the lifetime of the voxel lifetime will be infinity if a voxel is outside a cluster or in a compton scatter """ all_lifetimes = np.inf * np.ones(len(data)) clusts = form_clusters_new(data) # remove compton clusters selection = filter_compton(clusts) clusts = clusts[selection] non_compton = np.concatenate(clusts) cluster_features = get_cluster_features(data, clusts) for i in range(len(clusts)): clust = clusts[i] mean = cluster_features[:, :3][i] direction = cluster_features[:, -3:][i] coords = data[clust][:, :3] f = np.dot(coords - mean, direction) box_dim = 1 edges = [] for i in range(len(coords)): point = coords[i][:3] x, y, z = point region = coords indices = np.arange(len(coords)) indices = indices[np.searchsorted(region[:, 2], z - box_dim):] region = coords[indices] indices = indices[:np.searchsorted( region[:, 2], z + box_dim, side='right')] region = coords[indices] indices = indices[np.where((region[:, 1] >= y - box_dim) & (region[:, 1] <= y + box_dim) & (region[:, 0] >= x - box_dim) & (region[:, 0] <= x + box_dim))] region = coords[indices] for j in indices: if i != j: entry = sorted((i, j)) if entry not in edges: edges.append(entry) edges = np.array(edges) births, deaths, edge_list = merge_diagram(f, edges) lifetimes = deaths - births print(lifetimes) all_lifetimes[clust] = lifetimes print(all_lifetimes[clust]) return all_lifetimes
def forward(self, data): """ inputs data: data[0] - dbscan data output: dictionary, with 'edge_pred': torch.tensor with edge prediction weights """ # get device device = data[0].device # need to form graph, then pass through GNN clusts = form_clusters_new(data[0]) # remove compton clusters # if no cluster fits this condition, return if self.remove_compton: selection = filter_compton( clusts, self.compton_thresh) # non-compton looking clusters if not len(selection): e = torch.tensor([], requires_grad=True) e.to(device) return {'edge_pred': [e]} clusts = clusts[selection] # form graph batch = get_cluster_batch(data[0], clusts) edge_index = complete_graph(batch, device=device) if not edge_index.shape[0]: e = torch.tensor([], requires_grad=True) e.to(device) return {'edge_pred': [e]} # obtain vertex directions x = cluster_vtx_dirs(data[0], clusts, device=device) # obtain edge directions e = cluster_edge_dirs(data[0], clusts, edge_index, device=device) # get x batch xbatch = torch.tensor(batch).to(device) # get output outdict = self.edge_predictor(x, edge_index, e, xbatch) return outdict
def forward(self, data): """ inputs data: data[0] - dbscan data """ # need to form graph, then pass through GNN clusts = form_clusters_new(data[0]) # remove compton clusters (should we?) # if no cluster fits this condition, return selection = filter_compton(clusts) # non-compton looking clusters if not len(selection): x = torch.tensor([], requires_grad=True) if data[0].is_cuda: x.cuda() return x clusts = clusts[selection] # form complete graph batch = get_cluster_batch(data[0], clusts) edge_index = complete_graph(batch, cuda=True) if not len(edge_index): x = torch.tensor([], requires_grad=True) if data[0].is_cuda: x.cuda() return x batch = torch.tensor(batch) if data[0].is_cuda: batch = batch.cuda() # obtain vertex features #x = cluster_vtx_features(data[0], clusts, cuda=True) x = cluster_vtx_features_old(data[0], clusts, cuda=True) # go through layers x = self.econv1(x, edge_index) x = self.econv2(x, edge_index) x = self.econv3(x, edge_index) x, e, u = self.predictor(x, edge_index, edge_attr=None, u=None, batch=batch) return F.log_softmax(x, dim=1)
def forward(self, node_pred, data0, data1): """ node_pred: predicted node type from model forward data: data[0] - 5 types data data[1] - primary data """ data0 = data0[0] data1 = data1[0] # first decide what true edges should be # need to form graph, then pass through GNN # clusts = form_clusters(data0) clusts = form_clusters_new(data0) # remove track-like particles # types = get_cluster_label(data0, clusts) # selection = types > 1 # 0 or 1 are track-like # clusts = clusts[selection] # remove compton clusters # if no cluster fits this condition, return selection = filter_compton(clusts) # non-compton looking clusters if not len(selection): total_loss = self.lossfn(node_pred, node_pred) return {'accuracy': 1., 'loss_seg': total_loss} clusts = clusts[selection] # get the true node labels primaries = assign_primaries(data1, clusts, data0) #node_assn = torch.tensor([2*float(i in primaries)-1. for i in range(len(clusts))]) # must return -1 or 1 node_assn = torch.tensor([ int(i in primaries) for i in range(len(clusts)) ]) # must return 0 or 1 if node_pred.is_cuda: node_assn = node_assn.cuda() node_assn = node_assn.view(-1) #node_pred = node_pred.view(-1) weights = torch.tensor([1., 1.]) if node_pred.is_cuda: weights = weights.cuda() if self.balance: ind0 = node_assn == 0 ind1 = node_assn == 1 # number in each class n0 = torch.sum(ind0).float() n1 = torch.sum(ind1).float() weights[0] = n1 / (n0 + n1) weights[1] = n0 / (n0 + n1) print('class sizes', n0, n1) #total_loss = self.lossfn(node_pred, node_assn) print('weights', weights) total_loss = F.nll_loss(node_pred, node_assn, weight=weights) print(total_loss) # compute accuracy of assignment preds = torch.argmin(node_pred, dim=1) print(node_pred) print(preds) tot_vox = np.sum([len(c) for c in clusts]) int_vox = np.sum([ len(clusts[i]) for i in range(len(clusts)) if node_assn[i] == preds[i] ]) total_acc = int_vox * 1.0 / tot_vox #total_acc = torch.tensor(primary_assign_vox_efficiency(node_assn, node_pred, clusts)) return {'accuracy': total_acc, 'loss_seg': total_loss}
def forward(self, data): """ input data: data[0] - dbscan data data[1] - primary data output data: dictionary with following keys: edges : list of edge_index tensors used for edge prediction edge_pred : list of torch tensors with edge prediction weights matched : numpy array of group for each cluster (identified by primary index) n_iter : number of iterations taken each list is of length k, where k is the number of times the iterative network is applied """ # need to form graph, then pass through GNN clusts = form_clusters_new(data[0]) # remove compton clusters # if no cluster fits this condition, return if self.remove_compton: selection = filter_compton( clusts, self.compton_thresh) # non-compton looking clusters if not len(selection): e = torch.tensor([], requires_grad=True) if data[0].is_cuda: e = e.cuda() return e clusts = clusts[selection] #others = np.array([(i not in primaries) for i in range(n)]) batch = get_cluster_batch(data[0], clusts) # get x batch xbatch = torch.tensor(batch).cuda() primaries = assign_primaries(data[1], clusts, data[0], max_dist=self.pmd) # keep track of who is matched. -1 is not matched matched = np.repeat(-1, len(clusts)) matched[primaries] = primaries # print(matched) edges = [] edge_pred = [] counter = 0 found_match = True while (-1 in matched) and (counter < self.maxiter) and found_match: # continue until either: # 1. everything is matched # 2. we have exceeded the max number of iterations # 3. we didn't find any matches #print('iter ', counter) counter = counter + 1 # get matched indices assigned = np.where(matched > -1)[0] # print(assigned) others = np.where(matched == -1)[0] edge_index = primary_bipartite_incidence(batch, assigned, cuda=True) # check if there are any edges to predict # also batch norm will fail on only 1 edge, so break if this is the case if edge_index.shape[1] < 2: counter -= 1 break # obtain vertex features x = cluster_vtx_features(data[0], clusts, cuda=True) # obtain edge features e = cluster_edge_features(data[0], clusts, edge_index, cuda=True) # print(x.shape) # print(torch.max(edge_index)) # print(torch.min(edge_index)) out = self.edge_predictor(x, edge_index, e, xbatch) # predictions for this edge set. edge_pred.append(out[0][0]) edges.append(edge_index) #print(out[0][0].shape) matched, found_match = self.assign_clusters( edge_index, out[0][0][:, 1] - out[0][0][:, 0], others, matched, self.thresh) # print(edges) # print(edge_pred) #print('num iterations: ', counter) matched = torch.tensor(matched) counter = torch.tensor([counter]) if data[0].is_cuda: matched = matched.cuda() counter = counter.cuda() return { 'edges': [edges], 'edge_pred': [edge_pred], 'matched': [matched], 'counter': [counter] }
def forward(self, out, clusters, groups, primary): """ out: array output from the DataParallel gather function out[0] - n_gpus tensors of edge indexes out[1] - n_gpus tensors of predicted edge weights from model forward out[2] - n_gpus arrays of group ids for each cluster out[3] - n_gpus number of iterations data: cluster_labels - n_gpus Nx5 tensors of (x, y, z, batch_id, cluster_id) group_labels - n_gpus Nx5 tensors of (x, y, z, batch_id, group_id) em_primaries - n_gpus tensor of (x, y, z) coordinates of origins of EM primaries """ total_loss, total_acc, total_primary_fdr, total_primary_acc, total_iter = 0., 0., 0., 0., 0 ngpus = len(clusters) for i in range(ngpus): data0 = clusters[i] data1 = groups[i] data2 = primary[i] clusts = form_clusters_new(data0) # remove compton clusters # if no cluster fits this condition, return if self.remove_compton: selection = filter_compton( clusts) # non-compton looking clusters if not len(selection): edge_pred = out[1][i] total_loss += self.lossfn(edge_pred, edge_pred) total_acc += 1. continue clusts = clusts[selection] # process group data data_grp = data1 # form primary/secondary bipartite graph primaries = assign_primaries(data2, clusts, data0) batch = get_cluster_batch(data0, clusts) # edge_index = primary_bipartite_incidence(batch, primaries) group = get_cluster_label(data_grp, clusts) primaries_true = assign_primaries(data2, clusts, data1, use_labels=True) primary_fdr, primary_tdr, primary_acc = analyze_primaries( primaries, primaries_true) total_primary_fdr += primary_fdr total_primary_acc += primary_acc niter = out[3][i][0] # number of iterations total_iter += niter for j in range(niter): # determine true assignments edge_index = out[0][i][j] edge_assn = edge_assignment(edge_index, batch, group, cuda=True) edge_pred = out[1][i][j] # print(edge_pred) # print(edge_assn.shape) # print(edge_pred.shape) edge_assn = edge_assn.view(-1) edge_pred = edge_pred.view(-1) # print(edge_assn.shape) # print(edge_pred.shape) if self.balance: edge_assn, edge_pred = self.balance_classes( edge_assn, edge_pred) total_loss += self.lossfn(edge_pred, edge_assn) # compute accuracy of assignment # need to multiply by batch size to be accurate #total_acc = (np.max(batch) + 1) * torch.tensor(secondary_matching_vox_efficiency(edge_index, edge_assn, edge_pred, primaries, clusts, len(clusts))) # use out['matched'] total_acc += torch.tensor( secondary_matching_vox_efficiency2(out[2][i], group, primaries, clusts)) return { 'primary_fdr': total_primary_fdr / ngpus, 'primary_acc': total_primary_acc / ngpus, 'accuracy': total_acc / ngpus, 'loss': total_loss / ngpus, 'n_iter': total_iter }
def forward(self, out, clusters, groups, primary): """ out: array output from the DataParallel gather function out[0] - n_gpus tensors of edge indexes out[1] - n_gpus tensors of predicted edge weights from model forward out[2] - n_gpus arrays of group ids for each cluster out[3] - n_gpus number of iterations data: cluster_labels - n_gpus Nx5 tensors of (x, y, z, batch_id, cluster_id) group_labels - n_gpus Nx5 tensors of (x, y, z, batch_id, group_id) em_primaries - n_gpus tensor of (x, y, z) coordinates of origins of EM primaries """ total_loss, total_acc, total_primary_fdr, total_primary_acc, total_iter = 0., 0., 0., 0., 0 total_ari, total_ami, total_sbd, total_pur, total_eff = 0., 0., 0., 0., 0. ngpus = len(clusters) for i in range(ngpus): data0 = clusters[i] data1 = groups[i] data2 = primary[i] clusts = form_clusters_new(data0) # remove compton clusters # if no cluster fits this condition, return if self.remove_compton: selection = filter_compton( clusts) # non-compton looking clusters if not len(selection): edge_pred = out[1][i][0] total_loss += self.lossfn(edge_pred, edge_pred) total_acc += 1. clusts = clusts[selection] # process group data data_grp = data1 # form primary/secondary bipartite graph primaries = assign_primaries(data2, clusts, data0) batch = get_cluster_batch(data0, clusts) # edge_index = primary_bipartite_incidence(batch, primaries) group = get_cluster_label(data_grp, clusts) primaries_true = assign_primaries(data2, clusts, data1, use_labels=True) primary_fdr, primary_tdr, primary_acc = analyze_primaries( primaries, primaries_true) total_primary_fdr += primary_fdr total_primary_acc += primary_acc niter = out[3][i][0] # number of iterations total_iter += niter # loop over iterations and add loss at each iter. for j in range(niter): # determine true assignments edge_index = out[0][i][j] edge_assn = edge_assignment(edge_index, batch, group, cuda=True, dtype=torch.long) # get edge predictions (2 channels) edge_pred = out[1][i][j] edge_assn = edge_assn.view(-1) total_loss += self.lossfn(edge_pred, edge_assn) # compute accuracy of assignment total_acc += secondary_matching_vox_efficiency2( out[2][i], group, primaries, clusts) # get clustering metrics #print(out[2][i].shape) ari, ami, sbd, pur, eff = DBSCAN_cluster_metrics2( out[2][i].cpu().numpy(), clusts, group) total_ari += ari total_ami += ami total_sbd += sbd total_pur += pur total_eff += eff return { 'primary_fdr': total_primary_fdr / ngpus, 'primary_acc': total_primary_acc / ngpus, 'ARI': ari / ngpus, 'AMI': ami / ngpus, 'SBD': sbd / ngpus, 'purity': pur / ngpus, 'efficiency': eff / ngpus, 'accuracy': total_acc / ngpus, 'loss': total_loss / ngpus, 'n_iter': total_iter }
def find_shower_cone(dbscan, em_primaries, energy_data, types, length_factor=14.107334041, slope_percentile=52.94032412, slope_factor=5.86322059): """ dbscan: data parsed from "dbscan_label": ["parse_dbscan", "sparse3d_fivetypes"] em_primaries: data parsed from "em_primaries" : ["parse_em_primaries", "sparse3d_data", "particle_mcst"] energy_data: data parsed from "input_data": ["parse_sparse3d_scn", "sparse3d_data"] returns a list of length len(em_primaries) containing np arrays, each of which contains the indices corresponding to the voxels in the cone of the corresponding EM primary """ clusts = form_clusters_new(dbscan) selected_voxels = [] true_voxels = [] if len(clusts) == 0: # assignn everything to first primary selected_voxels.append(np.arange(len(dbscan))) print('all clusters identified as Compton') return selected_voxels assigned_primaries = assign_primaries_unique(em_primaries, clusts, types).astype(int) for i in range(len(assigned_primaries)): if assigned_primaries[i] != -1: c = clusts[assigned_primaries[i]] p = em_primaries[i] em_point = p[:3] # find primary cluster axis primary_points = dbscan[c][:, :3] primary_energies = energy_data[c][:, -1] if np.sum(primary_energies) == 0: selected_voxels.append(np.array([])) continue primary_center = np.average(primary_points.T, axis=1, weights=primary_energies) primary_axis = primary_center - em_point # find furthest particle from cone axis primary_length = np.linalg.norm(primary_axis) direction = primary_axis / primary_length axis_distances = np.linalg.norm(np.cross( primary_points - primary_center, primary_points - em_point), axis=1) / primary_length axis_projections = np.dot(primary_points - em_point, direction) primary_slope = np.percentile(axis_distances / axis_projections, slope_percentile) # define a cone around the primary axis cone_length = length_factor * primary_length cone_slope = slope_factor * primary_slope cone_vertex = em_point cone_axis = direction classified_indices = [] for j in range(len(dbscan)): point = types[j] if point[-1] < 2: continue coord = point[:3] axis_dist = np.dot(coord - em_point, cone_axis) if 0 <= axis_dist and axis_dist <= cone_length: cone_radius = axis_dist * cone_slope point_radius = np.linalg.norm( np.cross(coord - (em_point + cone_axis), coord - em_point)) if point_radius < cone_radius: # point inside cone classified_indices.append(j) classified_indices = np.array(classified_indices) selected_voxels.append(classified_indices) else: selected_voxels.append(np.array([])) return selected_voxels
def cluster(positions, em_primaries, params=[14.107334041, 52.94032412, 5.86322059, 1.01], inclusive=True): """ positions: Nx3 array of EM shower voxel positions em_primaries: Nx3 array of EM primary positions if inclusive=True: returns a list of length len(em_primaries) containing np arrays, each of which contains the indices corresponding to the voxels in the cone of the corresponding EM primary; note that each voxel might thus have multiple labels if inclusive=False: returns a tuple (arr of length len(em_primaries), arr of length len(positions)) corresponding to EM primary labels and the voxel labels; note that each voxel has a unique label """ length_factor = params[0] slope_percentile = params[1] slope_factor = params[2] dbscan = DBSCAN(eps=params[3], min_samples=3).fit(positions).labels_.reshape(-1, 1) dbscan = np.concatenate((positions, np.zeros((len(positions), 1)), dbscan), axis=1) clusts = form_clusters_new(dbscan) selected_voxels = [] true_voxels = [] if len(clusts) == 0: # assignn everything to first primary selected_voxels.append(np.arange(len(dbscan))) print('all clusters identified as Compton') return selected_voxels assigned_primaries = assign_primaries_unique( np.concatenate((em_primaries, np.zeros((len(em_primaries), 2))), axis=1), clusts, np.concatenate((positions, np.zeros((len(positions), 2))), axis=1)).astype(int) for i in range(len(assigned_primaries)): if assigned_primaries[i] != -1: c = clusts[assigned_primaries[i]] p = em_primaries[i] em_point = p[:3] # find primary cluster axis primary_points = dbscan[c][:, :3] primary_center = np.average(primary_points.T, axis=1) primary_axis = primary_center - em_point # find furthest particle from cone axis primary_length = np.linalg.norm(primary_axis) direction = primary_axis / primary_length axis_distances = np.linalg.norm(np.cross( primary_points - primary_center, primary_points - em_point), axis=1) / primary_length axis_projections = np.dot(primary_points - em_point, direction) primary_slope = np.percentile(axis_distances / axis_projections, slope_percentile) # define a cone around the primary axis cone_length = length_factor * primary_length cone_slope = slope_factor * primary_slope cone_vertex = em_point cone_axis = direction classified_indices = [] for j in range(len(dbscan)): point = positions[j] coord = point[:3] axis_dist = np.dot(coord - em_point, cone_axis) if 0 <= axis_dist and axis_dist <= cone_length: cone_radius = axis_dist * cone_slope point_radius = np.linalg.norm( np.cross(coord - (em_point + cone_axis), coord - em_point)) if point_radius < cone_radius: # point inside cone classified_indices.append(j) classified_indices = np.array(classified_indices) selected_voxels.append(classified_indices) else: selected_voxels.append(np.array([])) # don't require that each voxel can only be in one group if inclusive: return selected_voxels # require each voxel can only be in one group (order groups in descending size to overwrite large groups) em_primary_labels = -np.ones(len(selected_voxels)) node_labels = -np.ones(len(positions)) lengths = [] for group in selected_voxels: lengths.append(len(group)) sorter = np.argsort(lengths)[::-1] for l in range(len(selected_voxels)): if len(selected_voxels[sorter[l]]) > 0: node_labels[selected_voxels[sorter[l]]] = l em_primary_labels[sorter[l]] = l labeled = np.where(node_labels != -1) unlabeled = np.where(node_labels == -1) if len(labeled[0]) > 5 and len(unlabeled[0]) > 0: classified_positions = positions[labeled] unclassified_positions = positions[unlabeled] cl = KNeighborsClassifier(n_neighbors=2) cl.fit(classified_positions, node_labels[labeled]) node_labels[unlabeled] = cl.predict(unclassified_positions) return em_primary_labels, node_labels
def forward(self, out, data0, data1): """ out: dictionary output from GNN Model keys: 'edge_pred': predicted edge weights from model forward data: data[0] - DBSCAN data data[1] - groups data """ edge_pred = out[0][0] data0 = data0[0] data1 = data1[0] device = data0.device # first decide what true edges should be # need to form graph, then pass through GNN # clusts = form_clusters(data0) clusts = form_clusters_new(data0) # remove compton clusters # if no cluster fits this condition, return if self.remove_compton: selection = filter_compton( clusts, self.compton_thresh) # non-compton looking clusters if not len(selection): total_loss = self.lossfn(edge_pred, edge_pred) return {'accuracy': 1., 'loss': total_loss} clusts = clusts[selection] # process group data # data_grp = process_group_data(data1, data0) data_grp = data1 # form graph batch = get_cluster_batch(data0, clusts) edge_index = complete_graph(batch, device=device) if not edge_index.shape[0]: total_loss = self.lossfn(edge_pred, edge_pred) return {'accuracy': 0., 'loss': total_loss} group = get_cluster_label(data_grp, clusts) # determine true assignments edge_assn = edge_assignment(edge_index, batch, group, device=device, dtype=torch.long) edge_assn = edge_assn.view(-1) # total loss on batch total_loss = self.lossfn(edge_pred, edge_assn) # compute assigned clusters fe = edge_pred[1, :] - edge_pred[0, :] cs = assign_clusters_UF(edge_index, fe, len(clusts), thresh=0.0) ari, ami, sbd, pur, eff = DBSCAN_cluster_metrics2(cs, clusts, group) edge_ct = edge_index.shape[1] return { 'ARI': ari, 'AMI': ami, 'SBD': sbd, 'purity': pur, 'efficiency': eff, 'accuracy': ari, 'loss': total_loss, 'edge_count': edge_ct }
def forward(self, edge_pred, data0, data1, data2): """ edge_pred: predicted edge weights from model forward data: data[0] - 5 types data data[1] - groups data data[2] - primary data """ data0 = data0[0] data1 = data1[0] data2 = data2[0] # first decide what true edges should be # need to form graph, then pass through GNN # clusts = form_clusters(data0) clusts = form_clusters_new(data0) # remove track-like particles # types = get_cluster_label(data0, clusts) # selection = types > 1 # 0 or 1 are track-like # clusts = clusts[selection] # remove compton clusters # if no cluster fits this condition, return selection = filter_compton(clusts) # non-compton looking clusters if not len(selection): total_loss = self.lossfn(edge_pred, edge_pred) return {'accuracy': 1., 'loss_seg': total_loss} clusts = clusts[selection] # process group data # data_grp = process_group_data(data1, data0) data_grp = data1 # form primary/secondary bipartite graph primaries = assign_primaries(data2, clusts, data0) batch = get_cluster_batch(data0, clusts) edge_index = primary_bipartite_incidence(batch, primaries) group = get_cluster_label(data_grp, clusts) primaries_true = assign_primaries(data2, clusts, data1, use_labels=True) print("primaries (est): ", primaries) print("primaries (true): ", primaries_true) # determine true assignments edge_assn = edge_assignment(edge_index, batch, group, cuda=True) edge_assn = edge_assn.view(-1) edge_pred = edge_pred.view(-1) if self.balance: # weight edges so that 0/1 labels appear equally often ind0 = edge_assn == 0 ind1 = edge_assn == 1 # number in each class n0 = torch.sum(ind0).float() n1 = torch.sum(ind1).float() print("n0 = ", n0, " n1 = ", n1) # weights to balance classes w0 = n1 / (n0 + n1) w1 = n0 / (n0 + n1) print("w0 = ", w0, " w1 = ", w1) edge_assn[ind0] = w0 * edge_assn[ind0] edge_assn[ind1] = w1 * edge_assn[ind1] edge_pred = edge_pred.clone() edge_pred[ind0] = w0 * edge_pred[ind0] edge_pred[ind1] = w1 * edge_pred[ind1] total_loss = self.lossfn(edge_pred, edge_assn) # compute accuracy of assignment # need to multiply by batch size to be accurate total_acc = (np.max(batch) + 1) * torch.tensor( secondary_matching_vox_efficiency(edge_index, edge_assn, edge_pred, primaries, clusts, len(clusts))) return {'accuracy': total_acc, 'loss_seg': total_loss}
def find_shower_cone(dbscan, groups, em_primaries, energy_data, types, length_factor=14.107334041, slope_percentile=52.94032412, slope_factor=5.86322059, return_truth=False, verbose=False): """ dbscan: data parsed from "dbscan_label": ["parse_dbscan", "sparse3d_fivetypes"] groups: data parsed from "group_label": ["parse_cluster3d_clean", "cluster3d_mcst", "sparse3d_fivetypes"] em_primaries: data parsed from "em_primaries" : ["parse_em_primaries", "sparse3d_data", "particle_mcst"] energy_data: data parsed from "input_data": ["parse_sparse3d_scn", "sparse3d_data"] types: (???) Fivetypes label Tensor (N x 5) returns a list of length len(em_primaries) containing np arrays, each of which contains the indices corresponding to the voxels in the cone of the corresponding EM primary """ length_factor = params[0] slope_percentile = params[1] slope_factor = params[2] dbscan = DBSCAN(eps=params[3], min_samples=3).fit(positions).labels_.reshape(-1, 1) dbscan = np.concatenate((positions, np.zeros((len(positions), 1)), dbscan), axis=1) clusts = form_clusters_new(dbscan) assigned_primaries = assign_primaries_unique(em_primaries, clusts, groups, use_labels=True).astype(int) selected_voxels = [] true_voxels = [] cone_params_list = [] for i in range(len(assigned_primaries)): if assigned_primaries[i] != -1: c = clusts[assigned_primaries[i]] if return_truth: group_ids = np.unique(groups[c][:, -1]) type_id = -1 for g in groups[c]: for j in range(len(types)): if np.array_equal(g[:3], types[j][:3]): type_id = types[j][-1] break if type_id != -1: break true_indices = np.where( np.logical_and(np.isin(groups[:, -1], group_ids), types[:, -1] >= 2))[0] true_voxels.append(true_indices) p = em_primaries[i] em_point = p[:3] # find primary cluster axis primary_points = dbscan[c][:, :3] primary_center = np.average(primary_points.T, axis=1) primary_axis = primary_center - em_point # find furthest particle from cone axis (???) # COMMENT: Maybe not the furthest particle? This seems to select the slope by percentile. primary_length = np.linalg.norm(primary_axis) direction = primary_axis / primary_length axis_distances = np.linalg.norm(np.cross( primary_points - primary_center, primary_points - em_point), axis=1) / primary_length axis_projections = np.dot(primary_points - em_point, direction) primary_slope = np.percentile(axis_distances / axis_projections, slope_percentile) # define a cone around the primary axis cone_length = length_factor * primary_length cone_slope = slope_factor * primary_slope cone_vertex = em_point cone_axis = direction cone_params = (cone_length, cone_slope, cone_vertex, cone_axis) cone_params_list.append(cone_params) classified_indices = [] # Should be able to vectorize operation. for j in range(len(dbscan)): point = types[j] if point[-1] < 2: # ??? Why not != 2? continue coord = point[:3] axis_dist = np.dot(coord - em_point, cone_axis) if 0 <= axis_dist and axis_dist <= cone_length: cone_radius = axis_dist * cone_slope point_radius = np.linalg.norm( np.cross(coord - (em_point + cone_axis), coord - em_point)) if point_radius < cone_radius: # point inside cone classified_indices.append(j) classified_indices = np.array(classified_indices) selected_voxels.append(classified_indices) else: selected_voxels.append(np.array([])) if return_truth: return true_voxels, selected_voxels, cone_params_list else: return selected_voxels, cone_params_list