def get_median_rsmd(pdb_id): logging.info("Computing median RSMD ") ensemble = PDBParser(QUIET=True).get_structure( pdb_id, "data/{}.pdb".format(pdb_id)) super_imposer = Superimposer() window_size = 5 _RMSD_along_all_conf = [ ] # list of RMSD_n values acrossall possibile pairs of conformation for j, model_j in enumerate(ensemble): if j > 0: model_rmsd = [] # RMSD, no_fragment X fragment_size alt_model = [ atom for atom in model_j.get_atoms() if atom.get_name() == "CA" ] # coords of the model # Iterate fragments for start in range(len(ref_model) - window_size): end = start + window_size ref_fragment = ref_model[start:end] alt_fragment = alt_model[start:end] # Calculate rotation/translation matrices super_imposer.set_atoms(ref_fragment, alt_fragment) # print(super_imposer.rms, super_imposer.rotran) # Rotate-translate coordinates alt_fragment_coord = np.array( [atom.get_coord() for atom in alt_fragment]) alt_fragment_coord = np.dot(super_imposer.rotran[0].T, alt_fragment_coord.T).T alt_fragment_coord = alt_fragment_coord + super_imposer.rotran[ 1] # Calculate RMSD # https://en.wikipedia.org/wiki/Root-mean-square_deviation_of_atomic_positions ref_fragment_coord = np.array( [atom.get_coord() for atom in ref_fragment]) dist = ref_fragment_coord - alt_fragment_coord rmsd_fragment = np.sqrt( np.sum(dist * dist) / window_size ) # Total RMSD of the fragment. Identical to super_imposer.rms # rmsd_res = np.sqrt(np.sum(dist * dist, axis=1)) # RMSD for each residue of the fragment model_rmsd.append(rmsd_fragment) # print ("modeli-{}, modelj-{},".format(model_i,model_j)) _RMSD_along_all_conf.append(model_rmsd) else: ref_model = [ atom for atom in model_j.get_atoms() if atom.get_name() == "CA" ] # CA of the first model np_RMSD_along_all_conf_easy = np.array(_RMSD_along_all_conf) mean_RMSD_along_all_conf_easy = np.mean(np_RMSD_along_all_conf_easy, axis=0) median_RMSD_along_all_conf_easy = np.median(np_RMSD_along_all_conf_easy, axis=0) stDev_RMSD_along_all_conf_easy = np.std(np_RMSD_along_all_conf_easy, axis=0) return median_RMSD_along_all_conf_easy
def test_compare(): """ Compare the result of the diSTruct superimposer to the biopython one. """ from Bio import SeqIO from Bio.PDB import Superimposer as BPSuperimposer from Bio.PDB import PDBParser from distruct.tools.pdb import get_contacts code = '1ptq' fileName = testFilePath + code + '.pdb' refStructure = PDBParser().get_structure(code, fileName) contacts = get_contacts(refStructure[0], cutOff=5., minSeqDist=0) sequences = [] with open(fileName, 'r') as f: sequences = [r.seq for r in SeqIO.parse(f, "pdb-seqres")] pass ds = Distructure('test', sequences, [[r.get_id() for r in c if r.get_id()[0] == ' '] for c in refStructure[0]]) ds.generate_primary_contacts() ds.set_tertiary_contacts(contacts) ds.run() refStructure = PDBParser().get_structure(code, fileName) tempStructure = ds.copy() refAtoms = list(cull_atoms(refStructure.get_atoms(), ds)) resAtoms = list(cull_atoms(tempStructure.get_atoms(), refStructure)) assert len(refAtoms) > 3 assert len(refAtoms) == len(resAtoms) dssup = Superimposer() dssup.set_atoms(refAtoms, resAtoms) dsRMSD = dssup.rms bpsup = BPSuperimposer() bpsup.set_atoms(refAtoms, resAtoms) bpRMSD = bpsup.rms for atom in resAtoms: atom.set_coord(-1 * atom.get_coord()) pass bpsup.set_atoms(refAtoms, resAtoms) if bpsup.rms < bpRMSD: bpRMSD = bpsup.rms pass assert dsRMSD == approx(bpRMSD) return
def getTransformMatrix(self, atomStructFn, startId=-1, endId=-1): """find matrix that Superimposes two atom structures. this matrix moves atomStructFn to self """ if endId == -1: endId = 10000000 # load second atom structure aSH = AtomicStructHandler() aSH.read(atomStructFn) # Use the first model in the atom struct for alignment ref_model = self.getStructure()[0] sample_model = aSH.getStructure()[0] # Make a list of the atoms (in the structures) you wish to align. # In this case we use CA atoms whose index is in the # specified range (starId, endId) ref_atoms = [] sample_atoms = [] # Now get a list with CA atoms (alpha-carbon) # I assume that both atom structs have the same number of chains # and in the same order. for ref_chain, sample_chain in zip(ref_model, sample_model): # create a set with the id of all residues # for the current chain ref_set_id = set(res.get_id()[1] for res in ref_chain) sample_set_id = set(res.get_id()[1] for res in sample_chain) # keep the intersection as an ordered list (sets are not ordered) ref_set_id.intersection_update(sample_set_id) ref_list_id = sorted(ref_set_id) # delete AA smaller or large than the predefined values ref_list_id[:] = [ x for x in ref_list_id if x >= startId and x <= endId ] # Iiterate through all residues in each chain a store CA atoms for id in ref_list_id: ref_atoms.append(ref_chain[id]['CA']) sample_atoms.append(sample_chain[id]['CA']) # Now we initiate the superimposer: super_imposer = Superimposer() super_imposer.set_atoms(ref_atoms, sample_atoms) # super_imposer.apply(sample_model.get_atoms()) (rot, trans) = super_imposer.rotran # DEBUG, uncomment next two lines to see # transformation applied to atomStructFn # aSH.getStructure().transform(rot, trans) # aSH.write("/tmp/pp.cif") # convert 3x3 rotation matrix to homogeneous matrix rot = numpy.transpose(rot) # scipion and biopython use # different conventions tmp = numpy.r_[rot, numpy.zeros((1, 3))] mat = numpy.c_[tmp, numpy.array([[trans[0]], [trans[1]], [trans[2]], [1]])] return mat, super_imposer.rms
def superimpose_pdb_by_chain(chain_fix, chain_mov): ''' Superimpose the pdb that owns the chain_mov on the pdb that owns the chain_fix :param chain_fix: Chain object :param chain_mov: Chain object ''' sup = Superimposer() atoms_fix, atoms_mov = find_common_atoms(chain_fix, chain_mov) sup.set_atoms(atoms_fix, atoms_mov) chain_mov.parent.transform(rot = sup.rotran[0], tran = sup.rotran[1])
def superimpose_structures (core_model, test_model, RMSD_threshold): """ This function performs the superimpostion of the atoms of every combination between the core and the test chains and returns a dictionary with the superimposed chains as keys and the superimposition objects as values, the best RMSD of those combinations, and a boolean (will be True if there has been at least one superimposition). Keyword arguments: core_model -- first model of the core structure object. test_model -- first model of the test structure object. RMSD_threshold -- root-mean-square deviation threshold for the superimposition. """ # Declares variables best_RMSD = 0 previous_RMSD = True superimposed_chains = False superimpositions = {} for core_chain in core_model.get_chains(): # Iterates through all the chains of the core model #### Obtains the molecule type and the atoms of core chain #### core_molecule_type = get_molecule_type(core_chain) if core_molecule_type == "Protein": core_ca_atoms = get_backbone_atoms_protein(core_chain) core_atoms = (core_ca_atoms, core_molecule_type) else: core_c4_atoms = get_backbone_atoms_nucleicacids(core_chain) core_atoms = (core_c4_atoms, core_molecule_type) for test_chain in test_model.get_chains(): # Iterates through all the chains of the test model #### Obtains the molecule type and the atoms of test chain #### test_molecule_type = get_molecule_type(test_chain) if test_molecule_type == "Protein": test_ca_atoms = get_backbone_atoms_protein(test_chain) test_atoms = (test_ca_atoms, test_molecule_type) else: test_c4_atoms = get_backbone_atoms_nucleicacids(test_chain) test_atoms =(test_c4_atoms, test_molecule_type) if core_atoms[1] != test_atoms[1]: # Checks out if both chains are the same type of molecule pass # poner verbose elif len(core_atoms[0]) != len(test_atoms[0]): # Checks out if both chains have the same number of atoms pass # poner verbose else: # If both chains are the same chain, then is possible to superimpose them. superimposition = Superimposer() # Creates the Superimposer object superimposition.set_atoms(core_atoms[0], test_atoms[0]) # Performs the superimposition of core and test atoms. RMSD_value = superimposition.rms # Saves the RMSD value of the superimposition if RMSD_value > RMSD_threshold: # If the RMSD of the superimposition is bigger than the rmsd threshold, skip this superimposition. continue if previous_RMSD is True or RMSD_value < previous_RMSD: # If RMSD is lower than the threshold, this condition will be checked. If it is true, stores the current RMSD value. previous_RMSD = RMSD_value best_RMSD = RMSD_value superimpositions[(core_chain.get_id(),test_chain.get_id())] = superimposition # Stores in superimpositions dictionary the chain IDs as keys, and its respective result of the superimposition as value. superimposed_chains = True if superimposed_chains is True: # If at least has been one superimposition enterns the if condition. superimpositions = sorted(superimpositions.items(), key = lambda x: x[1].rms) # Sorts the dictionary according to RMSD value of the superimpositions. return (superimpositions, best_RMSD, superimposed_chains)
def get_best_RMSD(core, test): """ Given a core structure and the test structure. Returns the a tuple of the test model with the lower RMSD and the superimposer object that will be used to apply the matrix If the core chain is a nucleotide it passes through all the sequence to find the best passible superimposition with a part of the chain. """ best_RMSD = -1 best_model = None for test_chain in test[0].get_chains(): test_atoms = get_atoms_list(test_chain) for core_chain in core[0].get_chains(): core_atoms = get_atoms_list(core_chain) ini = 0 test_atoms_length = len(test_atoms) if arguments.args.core and molecule_type( core_chain) == molecule_type(test_chain) == "Nucleotide": while test_atoms_length <= len(core_atoms): core_atoms_superimpose = core_atoms[ini:test_atoms_length] superimpose = Superimposer() superimpose.set_atoms(core_atoms_superimpose, test_atoms) RMSD = superimpose.rms ini += 1 test_atoms_length += 1 if RMSD < best_RMSD or best_RMSD == -1: best_RMSD = RMSD best_model = test[0] superimposer_object_to_apply = superimpose elif len(core_atoms) == len(test_atoms) and molecule_type( test_chain) == molecule_type(core_chain): # Superimpose chains with same length superimpose = Superimposer() superimpose.set_atoms(core_atoms, test_atoms) RMSD = superimpose.rms if RMSD < best_RMSD or best_RMSD == -1: best_RMSD = RMSD best_model = test[0] superimposer_object_to_apply = superimpose if best_model: return (best_model, superimposer_object_to_apply, test.id) else: return None
def compare_structure(reference, alternate): parser = PDBParser() ref_struct = parser.get_structure('Reference', reference) alt_struct = parser.get_structure("Alternate", alternate) ref_model = ref_struct[0] ref_chain = ref_model['A'] alt_model = alt_struct[0] alt_chain = alt_model['A'] ref_atoms = [] alt_atoms = [] for ref_res in ref_chain: if (ref_res.get_resname() in resdict.keys()): ref_atoms.append(ref_res['CA']) for alt_res in alt_chain: if (alt_res.get_resname() in resdict.keys()): alt_atoms.append(alt_res['CA']) super_imposer = Superimposer() super_imposer.set_atoms(ref_atoms, alt_atoms) super_imposer.apply(alt_model.get_atoms()) #path = "/Users/arjunsrivatsa/Desktop/ALT_final_reconstruct.pdb" #make_pdb_file(alt_struct, path) full = super_imposer.rms return full
def align(mobile: Chain, reference: Chain) -> Tuple[Optional[Chain], int, Optional[float]]: align_mobile, align_ref = _get_residue_alignments(mobile, reference) num_align_residues = len(align_mobile) if num_align_residues == 0: return None, 0, None atoms_mobile = [residue.child_dict.get('CA') for residue in align_mobile] atoms_ref = [residue.child_dict.get('CA') for residue in align_ref] atoms_paired = [(a, b) for (a, b) in zip(atoms_mobile, atoms_ref) if a is not None and b is not None] if len(atoms_paired) == 0: return None, 0, None atoms_mobile, atoms_ref = zip(*atoms_paired) superimposer = Superimposer() superimposer.set_atoms(atoms_ref, atoms_mobile) all_atoms_disordered: List[Atom] = [] for residue in mobile.as_biopython().get_residues(): # NB: Get all atoms including disordered residues for atom in residue.get_unpacked_list(): all_atoms_disordered.append(atom) superimposer.apply(all_atoms_disordered) return mobile, num_align_residues, superimposer.rms
def align_structures_biopython(struct_path_ref, struct_path_query, new_query_path): def get_alignment(pdb_ref, pdb_query): seq_ref = get_sequence(pdb_ref) seq_query = get_sequence(pdb_query) aligned = get_pairwise_alignment(seq_ref, seq_query) aln_ref = aligned["ref_seq"] aln_query = aligned["query_seq"] aln = MultipleSeqAlignment([ SeqRecord(Seq(aln_ref, generic_protein), id="ref"), SeqRecord(Seq(aln_query, generic_protein), id="query") ]) return aln def get_sequence(pdb): seq = "" if len(pdb) > 1: raise ValueError( "Can not handle structures with more than one MODEL!\nThis structure has {0} MODELS!" .format(len(pdb))) if len(pdb[0]) > 1: raise ValueError( "Can not handle structures with more than one CHAIN!\nThis structure has {0} CHAINS!" .format(len(pdb[0]))) for model in pdb: for chain in model: for res in chain: if res.resname in to_one_letter_code: seq = "{0}{1}".format(seq, to_one_letter_code[res.resname]) return seq struct_ref = struct_path_ref struct_query = struct_path_query parser = PDBParser() pdb_ref = parser.get_structure("ref", struct_ref) pdb_query = parser.get_structure("query", struct_query) aln = get_alignment(pdb_ref, pdb_query) coords_ref = [] coords_query = [] al = StructureAlignment(aln, pdb_ref, pdb_query) for (r1, r2) in al.get_iterator(): if r1 is not None and r2 is not None: coords_ref.append(r1['CA']) coords_query.append(r2['CA']) coords_ref = np.array(coords_ref) coords_query = np.array(coords_query) super_imposer = Superimposer() super_imposer.set_atoms(coords_ref, coords_query) super_imposer.apply(pdb_query.get_atoms()) io = PDBIO() io.set_structure(pdb_query) io.save(new_query_path)
def get_all_RMSD(core, test): """ This function takes a test chain and a core chain and returns a list of tuples of the 100 best superpositions between both structures. If there are less than 100 it returns as many superpositions as it can find. If the core chain is a nucleotide it passes through all the sequence to find the best passible superimposition with a part of the chain. """ All_Models = [] for test_chain in test[0].get_chains(): test_atoms = get_atoms_list(test_chain) for core_chain in core[0].get_chains(): core_atoms = get_atoms_list(core_chain) ini = 0 test_atoms_length = len(test_atoms) if arguments.args.core and molecule_type( core_chain) == molecule_type(test_chain) == "Nucleotide": while test_atoms_length <= len(core_atoms): core_atoms_superimpose = core_atoms[ini:test_atoms_length] superimpose = Superimposer() superimpose.set_atoms(core_atoms_superimpose, test_atoms) #test_sequence = get_sequences_string(test_chain, 0, 5) #print(test_sequence) ini += 1 test_atoms_length += 1 my_model = (test[0], superimpose, test.id) if my_model: All_Models.append(my_model) elif len(core_atoms) == len(test_atoms) and molecule_type( test_chain) == molecule_type(core_chain): # Superimpose chains with same length superimpose = Superimposer() superimpose.set_atoms(core_atoms, test_atoms) my_model = (test[0], superimpose, test.id) if my_model: All_Models.append(my_model) All_Models = sorted(All_Models, key=lambda x: x[2]) return All_Models[0:100]
def compare_domains(s1_id: str, s1_filename: str, ds1_info: tuple, s2_id: str, s2_filename: str, ds2_info: tuple): """ Compare two PDB structures by calculating their gyration radius and RMSD (root-mean-square deviation of atomic positions). PDB structures should be equal primary protein structures: equal size, equal sequences :param s1_id: structure 1 id :param s1_filename: filename to PDB structure 1 :param ds1_info: domain info in structure 1 (turple: chain_number, start_residue, finish_residue) :param s2_id: structure 2 id :param s2_filename: filename to PDB structure 2 :param ds2_info: domain info in structure 2 (turple: chain_number, start_residue, finish_residue) """ # build structures pdb_parser = PDBParser() structure1 = pdb_parser.get_structure(s1_id, s1_filename) domain1 = get_structure_slice_by_residues(structure1, "real_structure", *ds1_info) domain1_residues = list(domain1.get_residues()) structure2 = pdb_parser.get_structure(s2_id, s2_filename) domain2 = get_structure_slice_by_residues(structure2, "domain1", *ds2_info) domain2_residues = list(domain2.get_residues()) # calculation parameters rg1 = get_gyration_radius(domain1) rg2 = get_gyration_radius(domain2) sup = Superimposer() sup.set_atoms(*get_synchronized_atoms(domain1_residues, domain2_residues)) domain_rms = sup.rms # printing results print(f"domain 1 gyration radius: {rg1}") print(f"domain 2 gyration radius: {rg2}") print(f"domain RMSD: {domain_rms}")
def rmsd(self, mobile, mob_ref=None): "Calculates Root Mean Square Deviation between 2 interfaces" if not mob_ref: ref_seq=[to_one_letter_code[r.resname] for r in self.child_list] alt_seq=[to_one_letter_code[r.resname] for r in mobile.child_list] ref_seq.sort() alt_seq.sort() print 'REFERENCE', ref_seq print 'ALTERNATIVE', alt_seq if ref_seq != alt_seq: raise ValueError("Sequences doesn't match") #Build paired lists of c-alpha atoms, ref_atoms and alt_atoms #Possibility to calculate RMSD on CA (ca), backbone (bb) or all atoms (all) ref_atoms, alt_atoms = self._get_atoms_coords(mobile, opt="ca") #Align these paired atom lists: super_imposer = Superimposer() super_imposer.set_atoms(ref_atoms, alt_atoms) print "RMS = %0.2f" % super_imposer.rms return super_imposer.rms
def alignement(s1, s2): """ This function is used to align two structure (S2 is aligned on S1). the new atoms coordinates are injected directly on the structure 2. """ fixed = [x for x in s1.get_atoms()] moving = [x for x in s2.get_atoms()] sup = Superimposer() sup.set_atoms(fixed, moving) sup.apply(moving)
def superimpose(self): """ Performs the superimposition. Returns RMSD. """ if not self.fixed or not self.moved: raise ModernaSuperimposerError('There are no fixed or moved atoms. Can not calculate rotation and translation matrix.') if not self.moved_atoms: raise ModernaSuperimposerError('There are no atoms for superimposition given. Can not applay rotation and translation matrix') sup = Superimposer() sup.set_atoms(self.fixed, self.moved) sup.apply(self.moved_atoms) self.rmsd = sup.rms return self.rmsd
def get_alignment(self, mobile_, mobile_chain, reference_, reference_chain): base = os.getcwd() pdb_path = os.path.join(base, 'pdb') mobile_path = os.path.join(pdb_path, 'pdb' + mobile_ + '.ent') reference_path = os.path.join(pdb_path, 'pdb' + reference_ + '.ent') # Parse structures & take only the necessary chain s_reference = self.parse_structure(reference_path) try: reference = s_reference[0][reference_chain] except KeyError: raise Exception(f"Chain {reference_chain} not found in reference.") s_mobile = self.parse_structure(mobile_path) try: mobile = s_mobile[0][mobile_chain] except KeyError: raise Exception(f"Chain {mobile_chain} not found in mobile.") # Align sequences to get mapping between residues mapping = self.align_sequences(reference, mobile) print(mapping) refe_ca_list, mobi_ca_list = [], [] for refe_res in mapping: refe_ca_list.append(reference[refe_res]["CA"]) mobi_ca_list.append(mobile[mapping[refe_res]]["CA"]) # Superimpose matching residues si = Superimposer() si.set_atoms(refe_ca_list, mobi_ca_list) si.apply(mobile.get_atoms()) print(f"RMSD between structures: {si.rms:4.2f}") # Write aligned mobile io = PDBIO() file = mobile_ + '_' + reference_ + ".ent" io.set_structure(mobile) io.save(file) with open(file, "r+") as file_: string_result = file_.read() #os.remove(file) return string_result
def build_complex(file_1, file_2): """ This function takes the complex output file (or in the first iteration one of the pairwise interactions) and another pairwise interaction PDB complex. Then it tries to add the chain to the complex until there is not clash @ Input - Two file path for a PDB interactions. @ Output - File path of the complex PDB file / Error: Chain cannot be added. """ parser = PDBParser(PERMISSIVE=1) structure_1 = parser.get_structure('Complex', file_1) structure_2 = parser.get_structure('Complex', file_2) sup = Superimposer() io = PDBIO() atoms_fixed, atoms_moving = Compute_equal_chain(structure_1, structure_2) try: sup.set_atoms(atoms_fixed, atoms_moving) except: return False sup.apply(list(structure_2.get_atoms())) for chain in structure_2[0].get_chains(): if chain.id != list(atoms_moving)[0].get_full_id()[2]: moved_chain = chain if check_clash(structure_1, moved_chain): with open(file_1, "wt") as out_file: for model in list(structure_1.get_chains()) + [moved_chain]: io.set_structure(model) io.save(out_file) rename_complex_chains(file_1) return True return False
action='store_true', help='Write full alignment for each comparison') ap.add_argument('--max_cycles', default=30.0, type=float, help='Minimum sequence identity to consider a match') ap.add_argument('--min_identity', default=3, type=int, help='Maximum no. of cycles for RMSD calculation') cmd = ap.parse_args() # Bio.PDB classes parser = PDBParser(QUIET=1) io = PDBIO() super_imposer = Superimposer() # The Real Stuff # Read reference first refe_path = cmd.reference if cmd.reference else cmd.pdbf_list[0] print('[+] Matching structures to {0}'.format(refe_path)) reference = parse_structure(refe_path) # Iterate over others for pdbf in cmd.pdbf_list: mobile = parse_structure(pdbf) print('[+] Comparing structures: {0} vs {1}'.format( reference.id, mobile.id)) chain_mapping = match_chains(reference, mobile, cmd.min_identity, cmd.write_full_alignment) rmsd_info = match_structures(reference, mobile, chain_mapping,
res_195_struct_1 = struct_1[0]['A'][195] res_57_struct_2 = struct_2[0]['A'][57] res_102_struct_2 = struct_2[0]['A'][102] res_195_struct_2 = struct_2[0]['A'][195] # Build 2 lists of atoms for calculating a rot.-trans. matrix # (target and probe). target = [] backbone_names = ['CA', 'N'] for name in backbone_names: target.append(res_57_struct_1[name]) target.append(res_102_struct_1[name]) target.append(res_195_struct_1[name]) probe = [] for name in backbone_names: probe.append(res_57_struct_2[name]) probe.append(res_102_struct_2[name]) probe.append(res_195_struct_2[name]) # Check whether target and probe lists are equal in size. # This is needed for calculating a rot.-trans. matrix assert len(target) == len(probe) # Calculate the rotation-translation matrix. sup = Superimposer() sup.set_atoms(target, probe) # Apply the matrix. Remember that it can be applied only on # lists of atoms. struct_2_atoms = [at for at in struct_2.get_atoms()] sup.apply(struct_2_atoms) # Write the rotation-translated structure out = PDBIO() out.set_structure(struct_2) out.save('1FXY-superimposed.pdb')
def compare_structure(reference, alternate): parser = PDBParser() ref_struct = parser.get_structure("Reference", path.join(PDBdir, reference)) alt_struct = parser.get_structure("Alternate", path.join(PDBdir, alternate)) ref_model = ref_struct[0] ref_chain = ref_model["A"] alt_model = alt_struct[0] alt_chain = alt_model["A"] ref_atoms = [] alt_atoms = [] for ref_res in ref_chain: if ref_res.get_resname() in resdict.keys(): ref_atoms.append(ref_res["CA"]) for alt_res in alt_chain: if alt_res.get_resname() in resdict.keys(): alt_atoms.append(alt_res["CA"]) super_imposer = Superimposer() super_imposer.set_atoms(ref_atoms, alt_atoms) super_imposer.apply(alt_model.get_atoms()) make_pdb_file(alt_struct, "Aligned_" + alternate) full = super_imposer.rms super_imposer_50 = Superimposer() super_imposer_50.set_atoms(ref_atoms[:50], alt_atoms[:50]) super_imposer_50.apply(alt_model.get_atoms()) make_pdb_file(alt_struct, "Aligned_50_" + alternate) f_50 = super_imposer_50.rms super_imposer_150 = Superimposer() super_imposer_150.set_atoms(ref_atoms[:150], alt_atoms[:150]) super_imposer_150.apply(alt_model.get_atoms()) make_pdb_file(alt_struct, "Aligned_150_" + alternate) f_150 = super_imposer_150.rms return f_50, f_150, full, len(ref_atoms)
def SuperimposeChains(final_files, temp_obj, PDB_bychain_objects, temp_chains): """ Superimposes each target chain atoms to the corresponding template chain atoms. Arguments: temp_obj: object of the current template. PDB_bychain_objects: list of PDB objects corresponding to each target chain. temp_chains: dictionary with the correspondencies of template-target chains. """ i = 0 ref_model = temp_obj[0] ppbuild = PPBuilder() template_chains = Selection.unfold_entities(temp_obj, 'C') min_len1 = min( list( map(lambda x: len(ppbuild.build_peptides(x)[0].get_sequence()), template_chains))) min_len2 = min( list( map(lambda x: len(ppbuild.build_peptides(x)[0].get_sequence()), PDB_bychain_objects))) min_len = min([min_len1, min_len2]) atoms_to_be_aligned = range(2, min_len) # Perform the superimposition for each target chain. for sample_structure in PDB_bychain_objects: sample_model = sample_structure[0] ref_atoms = [] sample_atoms = [] # Superimpose the target chain with it's corresponding template chain. for ref_chain in ref_model: for key, val in temp_chains.items(): if val == sample_structure.get_id(): if GeneralFunctions.GetNameWOChain( key) == temp_obj.get_id(): temp_ch = key if temp_obj.get_id() + "_" + ref_chain.get_id() == temp_ch: for ref_res in ref_chain: if ref_res.get_id( )[1] in atoms_to_be_aligned: # Ensure to superimpose the same number of atoms. ref_atoms.append( ref_res['CA']) # Take only C-alfa atoms. for sample_chain in sample_model: for sample_res in sample_chain: if sample_res.get_id( )[1] in atoms_to_be_aligned: # Ensure to superimpose the same number of atoms. sample_atoms.append( sample_res['CA']) # Take only C-alfa atoms. # Superimpose. super_imposer = Superimposer() super_imposer.set_atoms(ref_atoms, sample_atoms) matrix = super_imposer.rotran # Apply rotation and translation. for atom in sample_structure.get_atoms(): atom.transform(matrix[0], matrix[1]) # Create a PDB file to save the new coordinates. io = PDBIO() io.set_structure(sample_structure) io.save(temp_obj.get_id() + "_" + str(i) + "_aligned.pdb", write_end=False) i += 1 # Append each chain to a unique file. j = copy.copy(i) i = 1 file = open(temp_obj.get_id() + "_0_aligned.pdb", 'a') final_files.append(temp_obj.get_id() + "_0_aligned.pdb") while i < j: file2 = open(temp_obj.get_id() + "_" + str(i) + "_aligned.pdb") for line in file2: file.write(line) i += 1
def SuperimposeStructures(object_list, complex, RMSD_threshold): """ Superimposes chains from objects in object_list to chains in complex. Adds the non-clashing chains to the complex and removes the structure from the object_list. Returns the complex with the new added chains, and the updated object_list with Arguments: -object_list : list of PDB objects that have to be superimposed and added to the complex. -complex: main structure to which individual chains from the object_list have to be added after superimposition. -RMSD_threshold: threshold for the RMSD value of the superposition between a chain of an object an the same chain on the complex. Default value for the program is 0.5. """ # Get core chain to start reconstruction core = FindCoreChain(object_list) if options.verbose: sys.stderr.write("Chain defined as core to superimpose: %s\n" % (core)) sys.stderr.write("Added to the final complex:\n") # Declare Superimpose object sup = Superimposer() ref_struct = None for structure in list(object_list): # select the first structure with the core chain to be the reference try: if core in structure[0] and not ref_struct: ref_struct = copy.deepcopy(structure) complex.add(ref_struct[0]) except: pass # if the structure contains the core chain, superimpose that to the chain with same name in ref structure set before if core in structure[0] and (structure is not ref_struct): sup.set_atoms(unfold_entities(ref_struct[0][core], 'A'), unfold_entities(structure[0][core], 'A')) sup.apply(structure[0]) RMSD = float(sup.rms) print(RMSD) # check for clashes before adding new chain to complex if RMSD < RMSD_threshold: for chain in structure[0]: if chain.get_id() != core: if not CheckClashes(complex, chain): chain_copy = copy.deepcopy(chain) N = 65 while chain_copy.get_id() in [ a.get_id() for a in complex.get_chains() ]: try: chain_copy.id = chr(N) except ValueError: pass N += 1 complex[0].add(chain_copy) if options.verbose: sys.stderr.write("\tChain %s\n" % (chain.id)) object_list.remove(structure) return (complex, object_list)
def compare_structure(reference, alternate): parser=PDBParser() ref_struct=parser.get_structure('Reference', \ path.join(PDBdir, reference)) alt_struct= parser.get_structure("Alternate", \ path.join(PDBdir, alternate)) ref_model=ref_struct[0] ref_chain=ref_model['A'] alt_model=alt_struct[0] alt_chain=alt_model['A'] ref_atoms=[] alt_atoms=[] for ref_res in ref_chain: if(ref_res.get_resname() in resdict.keys()): ref_atoms.append(ref_res['CA']) for alt_res in alt_chain: if(alt_res.get_resname() in resdict.keys()): alt_atoms.append(alt_res['CA']) super_imposer= Superimposer() super_imposer.set_atoms(ref_atoms, alt_atoms) super_imposer.apply(alt_model.get_atoms()) make_pdb_file(alt_struct, "Aligned_" + alternate) full= super_imposer.rms super_imposer_50= Superimposer() super_imposer_50.set_atoms(ref_atoms[:50], alt_atoms[:50]) super_imposer_50.apply(alt_model.get_atoms()) make_pdb_file(alt_struct, "Aligned_50_" + alternate) f_50= super_imposer_50.rms super_imposer_150= Superimposer() super_imposer_150.set_atoms(ref_atoms[:150], alt_atoms[:150]) super_imposer_150.apply(alt_model.get_atoms()) make_pdb_file(alt_struct, "Aligned_150_" + alternate) f_150= super_imposer_150.rms return f_50, f_150, full, len(ref_atoms)
def compute_median_rmsd(self, k): """ This function computes the median of rmsd of a specific ped thanks to the application of the Superimposer :param k: model id :return: vector of median rmsd """ super_imposer = Superimposer() structure_rmsd_fragments = [ ] # no_models X no_fragments X fragment_size window_size = 9 # Get the current structure ped_id = self._ped_ids[k] structure = PDBParser(QUIET=True).get_structure( ped_id, "{}/{}.pdb".format(self._data_folder, ped_id)) ref_model = [ atom for atom in structure[0].get_atoms() if atom.get_name() == "CA" ] for i, model in enumerate(structure): if i > 0: model_rmsd = [] # no_fragment X fragment_size alt_model = [ atom for atom in model.get_atoms() if atom.get_name() == "CA" ] # coords of the model # Iterate fragments and calculate the correspondent RMSD thanks to the super_imposer operation for start in range(len(ref_model) - window_size): end = start + window_size ref_fragment = ref_model[start:end] alt_fragment = alt_model[start:end] # Calculate rotation/translation matrices super_imposer.set_atoms(ref_fragment, alt_fragment) # Rotate-translate coordinates alt_fragment_coord = np.array( [atom.get_coord() for atom in alt_fragment]) alt_fragment_coord = np.dot(super_imposer.rotran[0].T, alt_fragment_coord.T).T alt_fragment_coord = alt_fragment_coord + super_imposer.rotran[ 1] # Calculate RMSD ref_fragment_coord = np.array( [atom.get_coord() for atom in ref_fragment]) dist = ref_fragment_coord - alt_fragment_coord rmsd_res = np.sqrt(np.sum( dist * dist, axis=1)) # RMSD for each residue of the fragment model_rmsd.append(rmsd_res) structure_rmsd_fragments.append(model_rmsd) # Calculate average RMSD per position structure_rmsd_fragments = np.array(structure_rmsd_fragments) structure_rmsd_fragments = np.average(structure_rmsd_fragments, axis=0) structure_rmsd_fragments = np.pad( structure_rmsd_fragments, ((0, 0), (0, structure_rmsd_fragments.shape[0]))) # Roll the fragments one by one (add heading zeros) for i, row in enumerate(structure_rmsd_fragments): structure_rmsd_fragments[i] = np.roll(row, i) # Calculate average along columns of overlapping fragments (average RMSD per residue) return np.average(structure_rmsd_fragments, axis=0)
def test_Superimposer(self): """Test on module that superimpose two protein structures.""" pdb1 = "PDB/1A8O.pdb" p = PDBParser() s1 = p.get_structure("FIXED", pdb1) fixed = Selection.unfold_entities(s1, "A") s2 = p.get_structure("MOVING", pdb1) moving = Selection.unfold_entities(s2, "A") rot = numpy.identity(3).astype('f') tran = numpy.array((1.0, 2.0, 3.0), 'f') for atom in moving: atom.transform(rot, tran) sup = Superimposer() sup.set_atoms(fixed, moving) self.assertTrue(numpy.allclose(sup.rotran[0], numpy.identity(3))) self.assertTrue(numpy.allclose(sup.rotran[1], numpy.array([-1.0, -2.0, -3.0]))) self.assertAlmostEqual(sup.rms, 0.0, places=3) atom_list = ['N', 'C', 'C', 'O', 'C', 'C', 'SE', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'N', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'N', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'N', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'C', 'C', 'C', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'N', 'C', 'C', 'O', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'N', 'N', 'N', 'C', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'SE', 'C', 'N', 'C', 'C', 'O', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'S', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'N', 'C', 'C', 'O', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'N', 'C', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'N', 'C', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'O', 'N', 'C', 'C', 'O', 'C', 'C', 'SE', 'C', 'N', 'C', 'C', 'O', 'C', 'C', 'SE', 'C', 'N', 'C', 'C', 'O', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'N', 'C', 'C', 'O', 'C', 'S', 'N', 'C', 'C', 'O', 'C', 'C', 'C', 'O', 'N', 'N', 'C', 'C', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] sup.apply(moving) atom_moved = [] for aa in moving: atom_moved.append(aa.element) self.assertEqual(atom_moved, atom_list)
def test_Superimposer(self): """Test on module that superimpose two protein structures.""" pdb1 = "PDB/1A8O.pdb" p = PDBParser() s1 = p.get_structure("FIXED", pdb1) fixed = Selection.unfold_entities(s1, "A") s2 = p.get_structure("MOVING", pdb1) moving = Selection.unfold_entities(s2, "A") rot = numpy.identity(3).astype("f") tran = numpy.array((1.0, 2.0, 3.0), "f") for atom in moving: atom.transform(rot, tran) sup = Superimposer() sup.set_atoms(fixed, moving) self.assertTrue(numpy.allclose(sup.rotran[0], numpy.identity(3))) self.assertTrue( numpy.allclose(sup.rotran[1], numpy.array([-1.0, -2.0, -3.0]))) self.assertAlmostEqual(sup.rms, 0.0, places=3) # Turn black code style off # fmt: off atom_list = [ "N", "C", "C", "O", "C", "C", "SE", "C", "N", "C", "C", "O", "C", "C", "O", "O", "N", "C", "C", "O", "C", "C", "C", "C", "N", "C", "C", "O", "C", "C", "C", "N", "C", "N", "N", "N", "C", "C", "O", "C", "C", "C", "O", "N", "N", "C", "C", "O", "N", "C", "C", "O", "C", "C", "C", "N", "C", "C", "O", "C", "C", "C", "C", "N", "N", "C", "C", "O", "C", "C", "C", "O", "O", "N", "C", "C", "O", "C", "C", "C", "N", "C", "C", "O", "C", "C", "C", "C", "C", "C", "C", "N", "C", "C", "O", "C", "C", "C", "N", "C", "N", "N", "N", "C", "C", "O", "C", "C", "O", "O", "N", "C", "C", "O", "C", "C", "C", "C", "C", "C", "C", "O", "N", "C", "C", "O", "C", "C", "C", "N", "C", "C", "O", "C", "C", "O", "O", "N", "C", "C", "O", "C", "C", "C", "N", "C", "N", "N", "N", "C", "C", "O", "C", "C", "C", "C", "C", "C", "C", "N", "C", "C", "O", "C", "C", "C", "C", "C", "C", "C", "O", "N", "C", "C", "O", "C", "C", "C", "C", "N", "N", "C", "C", "O", "C", "O", "C", "N", "C", "C", "O", "C", "C", "C", "C", "N", "C", "C", "O", "C", "C", "C", "N", "C", "N", "N", "N", "C", "C", "O", "C", "N", "C", "C", "O", "C", "C", "C", "O", "O", "N", "C", "C", "O", "C", "C", "C", "O", "N", "N", "C", "C", "O", "C", "N", "C", "C", "O", "C", "O", "N", "C", "C", "O", "C", "C", "C", "O", "N", "N", "C", "C", "O", "C", "C", "C", "O", "O", "N", "C", "C", "O", "C", "C", "C", "N", "C", "C", "O", "C", "C", "C", "C", "N", "N", "C", "C", "O", "C", "C", "O", "N", "N", "C", "C", "O", "C", "C", "C", "C", "N", "C", "C", "C", "C", "C", "N", "C", "C", "O", "C", "C", "SE", "C", "N", "C", "C", "O", "C", "O", "C", "N", "C", "C", "O", "C", "C", "C", "O", "O", "N", "C", "C", "O", "C", "O", "C", "N", "C", "C", "O", "C", "C", "C", "C", "N", "C", "C", "O", "C", "C", "C", "C", "N", "C", "C", "O", "C", "C", "C", "N", "C", "C", "O", "C", "C", "C", "O", "N", "N", "C", "C", "O", "C", "C", "O", "N", "N", "C", "C", "O", "C", "N", "C", "C", "O", "C", "C", "O", "N", "N", "C", "C", "O", "C", "C", "C", "N", "C", "C", "O", "C", "C", "O", "O", "N", "C", "C", "O", "C", "S", "N", "C", "C", "O", "C", "C", "C", "C", "N", "N", "C", "C", "O", "C", "O", "C", "N", "C", "C", "O", "C", "C", "C", "C", "N", "C", "C", "O", "C", "C", "C", "C", "N", "C", "C", "O", "C", "C", "C", "C", "N", "N", "C", "C", "O", "C", "N", "C", "C", "O", "C", "C", "C", "C", "N", "C", "C", "O", "N", "C", "C", "O", "C", "C", "C", "N", "C", "C", "O", "N", "C", "C", "O", "C", "N", "C", "C", "O", "C", "O", "C", "N", "C", "C", "O", "C", "C", "C", "C", "N", "C", "C", "O", "C", "C", "C", "O", "O", "N", "C", "C", "O", "C", "C", "C", "O", "O", "N", "C", "C", "O", "C", "C", "SE", "C", "N", "C", "C", "O", "C", "C", "SE", "C", "N", "C", "C", "O", "C", "O", "C", "N", "C", "C", "O", "C", "N", "C", "C", "O", "C", "S", "N", "C", "C", "O", "C", "C", "C", "O", "N", "N", "C", "C", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O" ] # Turn black code style on # fmt: on sup.apply(moving) atom_moved = [] for aa in moving: atom_moved.append(aa.element) self.assertEqual(atom_moved, atom_list)
from Bio.PDB import PDBParser, PDBIO, Superimposer from Bio.PDB.Structure import Structure from copy import deepcopy parser = PDBParser() initial = parser.get_structure("initial", "../PDBFiles/1N3W_ALIGNED_1PEB.pdb") final = parser.get_structure("final", "../PDBFiles/1PEB_ALIGNED_1N3W.pdb") result = Structure("result") sup = Superimposer() sup.set_atoms([atom for atom in initial.get_atoms()], [atom for atom in final.get_atoms()]) sup.apply([atom for atom in final.get_atoms()]) def interpolate(initial, final, steps, startEndInclusive): if startEndInclusive: yield initial newModel = deepcopy(initial) for index in range(1, steps + 1): newModel.id = index for chain, chain2, chain3 in zip(initial, final, newModel): for residue, residue2, residue3 in zip(chain, chain2, chain3): for atom, atom2, atom3 in zip(residue, residue2, residue3): # The distances between each point's coordinates in cartasian space xDistance = abs(atom.coord[0] - atom2.coord[0])
def calcRMSD(self, fixed_atoms, moving_atoms): sup = Superimposer() sup.set_atoms(fixed_atoms, moving_atoms) return sup.rms