def calculate_ss(msaName, chain): # msaName = "1FM0_E_1FM0_D" array = pdb2biotite(msaName) array = array[array.hetero == False] # filters out hetatoms # Estimate secondary structure if len(chain) > 1: sse = [] for ch in chain: sse.append(struc.annotate_sse(array, chain_id=ch)) return np.append(sse[0], sse[1]) else: return struc.annotate_sse(array, chain_id=chain)
def test_sse(): array = strucio.load_structure(join(data_dir("structure"), "3o5r.mmtf")) sse = struc.annotate_sse(array, "A") sse_str = "".join(sse.tolist()) assert sse_str == ("caaaaaacccccccccccccbbbbbccccccbbbbccccccccccccccc" "ccccccccccccbbbbbbcccccccaaaaaaaaaccccccbbbbbccccc" "ccccccccccccbbbbbbbccccccccc")
def psea_sec(file): array = mmtf.get_structure(file, model=1) tk_dimer = array[struc.filter_amino_acids(array)] tk_mono = tk_dimer[tk_dimer.chain_id == "A"] sse = struc.annotate_sse(array, chain_id="A") return sse
def pdb2Gdata(dirName, fileName, saveDir=False): # print(os.path.join(dirName, fileName)) array = strucio.load_structure( os.path.join(dirName, fileName), # extra_fields=['atom_id', 'b_factor', 'occupancy', 'charge'], extra_fields=['b_factor', 'occupancy'], model=1) # if type(array) == biotite.structure.AtomArrayStack: # array = array[0] # ca = array[array.atom_name == "CA"] # cell_list = struc.CellList(ca, cell_size=self.threshold) chain_id = [] for chain in array.chain_id: if chain not in chain_id: chain_id.append(chain) sseDict = dict([(chain, struc.annotate_sse(array, chain_id=chain)) for chain in chain_id]) sseMaskDict = {} for key, value in sseDict.items(): mask = array[(array.chain_id == key) & (array.atom_name == 'CA')].res_id tmp = mask.shape[0] - value.shape[0] if tmp > 0: sseDict[key] = np.append(value, ['Null'] * tmp) sseMaskDict[key] = {} for maskId, sseId in zip(mask, sseDict[key]): sseMaskDict[key][maskId] = sseId cell_list = struc.CellList(array, cell_size=cfg.threshold) adj_matrix = cell_list.create_adjacency_matrix(cfg.threshold) # (adj_matrix[adj_matrix == True].shape[0] - 5385) / 2 edge_index = [[], []] nodeFeatures = [] arrayShp = array.shape[0] for i in range(arrayShp - 1): for j in range(i + 1, arrayShp): if adj_matrix[i][j]: edge_index[0].append(i) edge_index[1].append(j) nodeFeatures.append( list(array.coord[i]) + [atomsDict.get(array.atom_name[i], atomsDict['Null'])] + [elementsDict.get(array.element[i], elementsDict['Null'])] + [array.res_id[i]] + [residualesDict.get(array.res_name[i], residualesDict['Null'])] + [float(array.hetero[i])] + [array.occupancy[i]] + [array.b_factor[i]] + [ ssesTypeDict.get( sseMaskDict[array.chain_id[i]].get( array.res_id[i], 'Null'), ssesTypeDict['Null']) ]) nodeFeatures.append( list(array.coord[arrayShp - 1]) + [atomsDict.get(array.atom_name[arrayShp - 1], atomsDict['Null'])] + [elementsDict.get(array.element[arrayShp - 1], elementsDict['Null'])] + [array.res_id[arrayShp - 1]] + [ residualesDict.get(array.res_name[arrayShp - 1], residualesDict['Null']) ] + [float(array.hetero[arrayShp - 1])] + [array.occupancy[arrayShp - 1]] + [array.b_factor[arrayShp - 1]] + [ ssesTypeDict.get( sseMaskDict[array.chain_id[arrayShp - 1]].get( array.res_id[arrayShp - 1], 'Null'), ssesTypeDict['Null']) ]) nodeFeaturesT = torch.tensor(nodeFeatures, dtype=torch.float) edge_indexT = torch.tensor(edge_index, dtype=torch.long) data = Data(x=nodeFeaturesT, edge_index=edge_indexT) if saveDir: torch.save(data, os.path.join(saveDir, fileName)) return data
feature_plotters=[HelixPlotter(), SheetPlotter()]) fig.tight_layout() # Visualize seconday structure array # Sine the residues may not start at 1, # provide the actual first residue ID visualize_secondary_structure(sse, tk_mono.res_id[0]) ######################################################################## # Almost the same result can be achieved, when we calculate the # secondary structure ourselves using the DSSP software, # as the content in ``'secStructList'`` is also calculated by the RCSB. sse = dssp.DsspApp.annotate_sse(tk_mono) sse = np.array([dssp_to_abc[e] for e in sse], dtype="U1") visualize_secondary_structure(sse, tk_mono.res_id[0]) # sphinx_gallery_thumbnail_number = 4 ######################################################################## # The one and only difference is that the second helix is slightly # shorter. # This is probably caused by different versions of DSSP. # # Last but not least we calculate the secondary structure using # *Biotite*'s built-in method, based on the P-SEA algorithm. sse = struc.annotate_sse(array, chain_id="A") visualize_secondary_structure(sse, tk_mono.res_id[0]) plt.show()