def get_interactions(self, structureId, structure): rows = [] cutoffDistanceSquared = self.filter.get_distance_cutoff()**2 arrays = ColumnarStructure(structure, True) chainNames = arrays.get_chain_names() groupNames = arrays.get_group_names() groupNumbers = arrays.get_group_numbers() atomNames = arrays.get_atom_names() entityIndices = arrays.get_entity_indices() elements = arrays.get_elements() polymer = arrays.is_polymer() sequenceMapIndices = arrays.get_sequence_positions() x = arrays.get_x_coords() y = arrays.get_y_coords() z = arrays.get_z_coords() # create a distance box for quick lookup interactions of polymer atoms # of the specified elements boxes = {} for i in range(arrays.get_num_atoms()): if polymer[i] \ and (self.filter.is_target_group(groupNames[i]) or self.filter.is_query_group(groupNames[i])) \ and (self.filter.is_target_atom_name(atomNames[i]) or self.filter.is_query_atom_name(atomNames[i])) \ and (self.filter.is_target_element(elements[i]) or self.filter_is_query_element_name(elements[i])) \ and not self.filter.is_prohibited_target_group(groupNames[i]): if chainNames[i] not in boxes: box = DistanceBox(self.filter.get_distance_cutoff()) boxes[chainNames[i]] = box newPoint = np.array([x[i], y[i], z[i]]) boxes[chainNames[i]].add_point(newPoint, i) chainBoxes = [(k, v) for k, v in boxes.items()] # loop over all pairwise polymer chain interactions for i in range(len(chainBoxes) - 1): chainI = chainBoxes[i][0] boxI = chainBoxes[i][1] for j in range(i + 1, len(chainBoxes)): chainJ = chainBoxes[j][0] boxJ = chainBoxes[j][1] intersectionI = boxI.getIntersection(boxJ) intersectionJ = boxJ.getIntersection(boxI) # maps to store sequence indices mapped to group numbers indicesI = {} indicesJ = {} entityIndexI = -1 entityIndexJ = -1 # loop over pairs of atom interactions and check if # they satisfy the interaction filter criteria for n in intersectionI: for m in intersectionJ: dx = x[n] - x[m] dy = y[n] - y[m] dz = z[n] - z[m] dSq = dx * dx + dy * dy + dz * dz if dSq <= cutoffDistanceSquared: if self.filter.is_target_group(groupNames[n]) \ and self.filter.is_target_atom_name(atomNames[n]) \ and self.filter.is_target_element(elements[n]) \ and self.filter.is_query_group(groupNames[m]) \ and self.filter.is_query_atom_name(atomNames[m]) \ and self.filter.is_query_element(elements[m]): entityIndexI = entityIndices[n] indicesI[ sequenceMapIndices[n]] = groupNumbers[n] if self.filter.is_target_group(groupNames[m]) \ and self.filter.is_target_atom_name(atomNames[m]) \ and self.filter.is_target_element(elements[m]) \ and self.filter.is_query_group(groupNames[n]) \ and self.filter.is_query_atom_name(atomNames[n]) \ and self.filter.is_query_element(elements[n]): entityIndexJ = entityIndices[m] indicesJ[ sequenceMapIndices[m]] = groupNumbers[m] if len(indicesI) >= self.filter.get_min_interactions(): sequenceIndiciesI = sorted([int(i) for i in indicesI.keys()]) groupNumbersI = sorted(list(indicesI.values())) rows.append(Row(structureId + '.' + chainI, chainJ, chainI, \ groupNumbersI, sequenceIndiciesI, \ structure.entity_list[entityIndexI]['sequence'])) if len(indicesJ) >= self.filter.get_min_interactions(): sequenceIndiciesJ = sorted([int(i) for i in indicesJ.keys()]) groupNumbersJ = sorted(list(indicesJ.values())) rows.append(Row(structureId + '.' + chainJ, chainI, chainJ, \ groupNumbersJ, sequenceIndiciesJ, \ structure.entity_list[entityIndexJ]['sequence'])) return rows
def __call__(self, t): structure_id = t[0] structure = t[1] arrays = ColumnarStructure(structure, True) # if there is only a single chain, there are no intermolecular interactions if structure.num_chains == 1 and self.inter and not self.intra: return [] # Apply query filter group_names = arrays.get_group_names() qg = self.filter.is_query_group_np(group_names) if np.count_nonzero(qg) == 0: return [] elements = arrays.get_elements() qe = self.filter.is_query_element_np(elements) if np.count_nonzero(qe) == 0: return [] atom_names = arrays.get_atom_names() qa = self.filter.is_query_atom_name_np(atom_names) if np.count_nonzero(qa) == 0: return [] # Create mask for polymer atoms polymer = arrays.is_polymer() # Apply query filter to polymer polyq = polymer & qg & qe & qa if np.count_nonzero(polyq) == 0: return [] # Apply target filter to polymer atoms tg = self.filter.is_target_group_np(group_names) te = self.filter.is_target_element_np(elements) ta = self.filter.is_target_atom_name_np(atom_names) polyt = polymer & tg & te & ta if np.count_nonzero(polyt) == 0: return [] chain_names = arrays.get_chain_names() group_numbers = arrays.get_group_numbers() entity_indices = arrays.get_entity_indices() sequence_positions = arrays.get_sequence_positions() # Stack coordinates into an nx3 array # TODO add this to ColumnarStructure c = np.stack((arrays.get_x_coords(), arrays.get_y_coords(), arrays.get_z_coords()), axis=-1) # Apply mask for query atoms cpq = c[polyq] pgq = group_names[polyq] pnq = group_numbers[polyq] paq = atom_names[polyq] pcq = chain_names[polyq] # Apply mask for target atoms cpt = c[polyt] pgt = group_names[polyt] pnt = group_numbers[polyt] pat = atom_names[polyt] pct = chain_names[polyt] pet = entity_indices[polyt] pst = sequence_positions[polyt] # Calculate distances between the two atom sets tree_t = cKDTree(cpt) tree_q = cKDTree(cpq) distance_cutoff = self.filter.get_distance_cutoff() sparse_dm = tree_t.sparse_distance_matrix(tree_q, max_distance=distance_cutoff, output_type='dict') # Add interactions to rows. # There are redundant interactions when aggregating the results at the 'group' level, # since multiple atoms in a group may be involved in interactions. # Therefore we use a set of rows to store only unique interactions. rows = set([]) for ind, dis in sparse_dm.items(): i = ind[0] # polymer target atom index j = ind[1] # polymer query atom index # handle intra vs inter-chain interactions if pcq[j] == pct[i]: # cases with interactions in the same chain if not self.intra: # exclude intrachain interactions continue elif pnq[j] == pnt[i]: # exclude interactions within the same chain and group continue else: # case with interactions in different chains if not self.inter: # exclude inter-chain interactions continue # exclude self interactions (this can happen if the query and target criteria overlap) if dis < 0.001: continue if self.level == 'chain': row = Row(structure_id + "." + pct[i], # structureChainId pgq[j], # queryGroupId pcq[j], # queryChainId pnq[j], # queryGroupNumber pct[i] # targetChainId ) rows.add(row) elif self.level == 'group': row = Row(structure_id + "." + pct[i], # structureChainId pgq[j], # queryGroupId pcq[j], # queryChainId pnq[j], # queryGroupNumber pgt[i], # targetGroupId pct[i], # targetChainId pnt[i], # targetGroupNumber pst[i].item(), # sequenceIndex structure.entity_list[pet[i]]['sequence'] # sequence ) rows.add(row) elif self.level == 'atom': row = Row(structure_id + "." + pct[i], # structureChainId pgq[j], # queryGroupId pcq[j], # queryChainId pnq[j], # queryGroupNumber paq[j], # queryAtomName pgt[i], # targetGroupId pct[i], # targetChainId pnt[i], # targetGroupNumber pat[i], # targetAtomName dis, # distance pst[i].item(), # sequenceIndex structure.entity_list[pet[i]]['sequence'] # sequence ) rows.add(row) return rows
def get_interactions(self, structureId, structure): rows = [] cutoffDistanceSquared = self.filter.get_distance_cutoff() ** 2 arrays = ColumnarStructure(structure, True) chainNames = arrays.get_chain_names() groupNames = arrays.get_group_names() groupNumbers = arrays.get_group_numbers() atomNames = arrays.get_atom_names() entityIndices = arrays.get_entity_indices() elements = arrays.get_elements() polymer = arrays.is_polymer() sequenceMapIndices = arrays.get_sequence_positions() x = arrays.get_x_coords() y = arrays.get_y_coords() z = arrays.get_z_coords() # create a distance box for quick lookup interactions of polymer atoms # of the specified elements box = DistanceBox(self.filter.get_distance_cutoff()) for i in range(arrays.get_num_atoms()): if polymer[i] \ and self.filter.is_target_group(groupNames[i]) \ and self.filter.is_target_atom_name(atomNames[i]) \ and self.filter.is_target_element(elements[i]) \ and not self.filter.is_prohibited_target_group(groupNames[i]): newPoint = np.array([x[i],y[i],z[i]]) box.add_point(newPoint, i) groupToAtomIndices = arrays.get_group_to_atom_indices() for g in range(arrays.get_num_groups()): # position of first and last atom +1 in group start = groupToAtomIndices[g] end = groupToAtomIndices[g+1] # skip polymer groups if polymer[start]: continue # the specified filter conditions (some groups may be excluded, # e.g. water) if self.filter.is_query_group(groupNames[start]): print(groupNames[start]) # create list of atoms that interact within the cutoff distance neighbors = [] for a in range(start,end): if self.filter.is_query_atom_name(atomNames[a]) \ and self.filter.is_query_element(elements[a]): p = np.array([x[a], y[a], z[a]]) # loop up neighbors that are within a cubic for j in box.get_neighbors(p): dx = x[j] - x[a] dy = y[j] - y[a] dz = z[j] - z[a] dSq = dx * dx + dy * dy + dz * dz if dSq <= cutoffDistanceSquared: neighbors.append(j) if len(neighbors) == 0: continue interactions2 = {} for neighbor in neighbors: if chainNames[neighbor] not in interactions2: interactions2[chainNames[neighbor]] = [] # keep track of which group is interacting seqPos = sequenceMapIndices[neighbor] # non-polymer groups have a negative index and are exlcuded here if seqPos > 0: l = [seqPos, groupNumbers[neighbor], entityIndices[neighbor]] interactions2[chainNames[neighbor]].append(l) for key, val in interactions2.items(): sequenceIndices = set() residueNames = set() sequence = None for v in val: sequenceIndices.add(int(v[0])) residueNames.add(int(v[1])) if sequence is None: sequence = structure.entity_list[v[2]]['sequence'] if len(sequenceIndices) > 0: rows.append(Row(structureId + "." + key, groupNames[start], \ groupNumbers[start], chainNames[start], \ key, sorted(list(residueNames)), \ sorted(list(sequenceIndices)), sequence,\ len(interactions2))) return rows
def __call__(self, t): structure_id = t[0] structure = t[1] arrays = ColumnarStructure(structure, True) # Apply query (ligand) filter group_names = arrays.get_group_names() qg = self.filter.is_query_group_np(group_names) if np.count_nonzero(qg) == 0: return [] elements = arrays.get_elements() qe = self.filter.is_query_element_np(elements) if np.count_nonzero(qe) == 0: return [] atom_names = arrays.get_atom_names() qa = self.filter.is_query_atom_name_np(atom_names) if np.count_nonzero(qa) == 0: return [] ### filter prohibited groups?? # Create mask for polymer atoms polymer = arrays.is_polymer() # Create mask for ligand atoms lig = ~polymer & qg & qe & qa if np.count_nonzero(lig) == 0: return [] # Apply target (polymer) filter tg = self.filter.is_target_group_np(group_names) te = self.filter.is_target_element_np(elements) ta = self.filter.is_target_atom_name_np(atom_names) poly = polymer & tg & te & ta if np.count_nonzero(poly) == 0: return [] chain_names = arrays.get_chain_names() group_numbers = arrays.get_group_numbers() entity_indices = arrays.get_entity_indices() sequence_positions = arrays.get_sequence_positions() # Stack coordinates into an nx3 array # TODO add this to ColumnarStructure c = np.stack((arrays.get_x_coords(), arrays.get_y_coords(), arrays.get_z_coords()), axis=-1) # Apply ligand mask to ligand data c_ligand = c[lig] lg = group_names[lig] ln = group_numbers[lig] la = atom_names[lig] lc = chain_names[lig] # Apply polymer mask to polymer data c_polymer = c[poly] pg = group_names[poly] pn = group_numbers[poly] pa = atom_names[poly] pc = chain_names[poly] pt = entity_indices[poly] ps = sequence_positions[poly] # Calculate distances between polymer and ligand atoms poly_tree = cKDTree(c_polymer) lig_tree = cKDTree(c_ligand) distance_cutoff = self.filter.get_distance_cutoff() sparse_dm = poly_tree.sparse_distance_matrix( lig_tree, max_distance=distance_cutoff, output_type='dict') # Add interactions to rows. # There are redundant interactions when aggregating the results at the 'group' level, # since multiple atoms in a group may be involved in interactions. # Therefore we use a set of rows to store only unique interactions. rows = set([]) for ind, dis in sparse_dm.items(): i = ind[0] # ligand atom index j = ind[1] # polymer atom index if self.level == 'chain': row = Row( structure_id + "." + pc[i], # structureChainId lg[j], # queryLigandId lc[j], # queryLigandChainId ln[j], # queryLigandNumber pc[i] # targetChainId ) rows.add(row) elif self.level == 'group': row = Row( structure_id + "." + pc[i], # structureChainId lg[j], # queryLigandId lc[j], # queryLigandChainId ln[j], # queryLigandNumber pg[i], # targetGroupId pc[i], # targetChainId pn[i], # targetGroupNumber ps[i].item(), # sequenceIndex structure.entity_list[pt[i]]['sequence'] # sequence ) rows.add(row) elif self.level == 'atom': row = Row( structure_id + "." + pc[i], # structureChainId lg[j], # queryLigandId lc[j], # queryLigandChainId ln[j], # queryLigandNumber la[j], # queryAtomName pg[i], # targetGroupId pc[i], # targetChainId pn[i], # targetGroupNumber pa[i], # targetAtomName dis, # distance ps[i].item(), # sequenceIndex structure.entity_list[pt[i]]['sequence'] # sequence ) rows.add(row) return rows