def get_FUZZLE_hhs(domain): """ :param domain: str. The domain to download from Fuzzle as hhs :return: filepath: path where the file is located. """ logger.info( f'Attempting to download hhs file for {domain} from the FUZZLE server') url = f'https://fuzzle.uni-bayreuth.de/hhs/scop95_2.07.psi.hhs/{domain}.hhs' connected = False while not connected: try: response = urllib.request.urlopen(url) text = response.read() except Exception as e: import time logger.warning( f'Failed to connect to FUZZLE with error {e}. Sleeping 5s and retrying.' ) time.sleep(5) continue connected = True filepath = string_to_tempfile(text.decode('ascii'), 'hhs') logger.info(f"File downloaded as {filepath}") return filepath
def _domain_from_PDB(pdb: str): """ Returns all domains included in a SCOP version that come from a PDB. :param pdb: A 4-letter pdb code :return: A list of domains that this PDB contains """ pdb = pdb.lower() cur.execute( f"SELECT sdomain FROM scop_pdbref_scop WHERE pdbref like '{pdb}'") hits = cur.fetchall() domains = tuple(np.unique([x[0] for x in hits])) if domains: if len(domains) == 1: domains = str("('" + domains[0] + "')") cur.execute(f"SELECT ref FROM astral_astral95 WHERE id in {domains}") hits = cur.fetchall() domains = np.unique([x[0] for x in hits]) logger.info(f"Domain(s) {domains} are present in the Fuzzle database" f" as representatives for pdb {pdb.capitalize()}") else: from protlego.builder.builder import NotCorrectPDBError raise NotCorrectPDBError( f"PDB code {pdb.capitalize()} does not yet appear in the SCOP database" ) return domains
def unique_clusters(self): """ It returns a list of unique domains""" unique_clusters = np.unique([(hit.q_cluster, hit.s_cluster) for hit in self.hits]) logger.info( f"The query contains {len(unique_clusters)} different clusters") return unique_clusters
def unique_domains(self): """ It returns a list of unique domains""" unique_domains = np.unique([(hit.query, hit.sbjct) for hit in self.hits]) logger.info( f"The query contains {len(unique_domains)} different domains") return unique_domains
def _superimpose_chunk(self, qpairs: list, spairs: list, qmol: Molecule, smol: Molecule) -> Tuple[ Molecule, np.ndarray]: """ Superimposes the part of the total sequence alignment defined by the indexes qpairs/spairs :param qpairs: list of indexes to align from the pdb query :param spairs: list of indexes to align from the pdb subject :param qmol: the query pdb :param smol: the subject pdb :return: smol: the subject molecule aligned. distances: a list of the distances between the alpha Carbons after alignment. """ # Copying because we are going to cut the pdbs into the chunks copyq = qmol.copy() copys = smol.copy() copyq.filter('protein and backbone and same residue as index %s' % ' '.join(map(str, qpairs))) copys.filter('protein and backbone and same residue as index %s' % ' '.join(map(str, spairs))) copyq.write('/tmp/copyq.pdb') copys.write('/tmp/copys.pdb') # Matrix for VMD try: # We align subject onto the query tm_matrix = get_tmalign_output('/tmp/copys.pdb', '/tmp/copyq.pdb', "matrix") except Exception as e: raise ChildProcessError(f"TMalign cannot align the PDBs. Error follows: {e}") vectran, matrot = tm2vmd(tm_matrix) # remove copy files os.remove('/tmp/copyq.pdb') os.remove('/tmp/copys.pdb') # align the whole subject domain and fragment. # Copying so that the original smol does not lose the origin of coordinates s1mol = smol.copy() s1mol.rotateBy(matrot) s1mol.moveBy(vectran) copys.rotateBy(matrot) copys.moveBy(vectran) # Compute RMSD for the fragments rmsd = MetricRmsd(copyq, 'protein and name CA', pbc=False) data = rmsd.project(copys) logger.info(f"The RMSD between the fragments is {data} over {len(spairs)} alpha carbons") # Compute distances between the two selections bbq = copyq.get("coords", sel="protein and name CA") bbs = copys.get("coords", sel="protein and name CA") distances = np.diagonal(cdist(bbq, bbs)) return s1mol, distances
def fragments(self): """ This function creates and prints out the number of fragments in the graph :rtype: integer, number of fragments """ if not self.graph: logger.info( " You need to create a network first before computing its sizes." " Call create_network(). Exiting...") return self.comp, hist = label_components(self.graph) self.numFrags = max(self.comp.a) + 1 logger.info("There are ", self.numFrags, " fragments") return self.comp
def find_nonstandards(pdb: Molecule) -> list: """ Finds non-standard aminoacids :param pdb: Molecule or Chimera object where to find non-standard aminoacids. :return: list of non-standard residues """ non_standards = [aa for aa in np.unique(pdb.resname) if (aa in aa_keys or aa not in standard_aas)] if non_standards: for i in non_standards: if i != 'UNK': logger.info(f"Found the following non-standar residue: {i}. " f"Preserving in the original PDB") else: logger.warning("Protein presents unknown residue UNK." " Call remove_residue() to remove it or provide parameters" " if you want to minimize it with AMBER or CHARMM.") return non_standards
def superimpose_structures(self, aln: HHpredHitAlignment, partial_alignment: bool = False): """ Moves the two molecules to the origin of coordinates. Aligns the full structures according the fragment and obtains RMSDs and distances. :param qpairs: List of CA to be aligned in the query structure :param spairs: List of CA to be aligned in the subject structure :return: """ self._get_pairs(aln) # Re-align if command is called twice if self.qaPDB: self.qaPDB = {} self.saPDB = {} self.dst = [] if partial_alignment is False: qpairs = self.global_qpairs spairs = self.global_spairs else: qpairs = self.qpairs spairs = self.spairs # Print info if the alignment was intended partial but there's only one chunk if len(qpairs) == 1 and partial_alignment is True: logger.info("The sequence alignment only contains one chunk. Performing global alignment") # We only need one query, center to the origin of coordinates. # It is the subject that aligns to this template. qmol = self.qPDB.copy() smol = self.sPDB.copy() qmol.center() smol.center() self.qaPDB[0] = qmol.copy() for index, qpair_chunk in enumerate(qpairs): logger.info(f"Performing alignment {index+1} with TMalign") saPDB, distance = self._superimpose_chunk(qpair_chunk, spairs[index], qmol, smol) self.dst.append(distance) self.saPDB[index] = saPDB.copy() self.global_dst = [[item for chunk in self.dst for item in chunk]] return self.qaPDB, self.saPDB
def compute_hydrophobic_clusters( self, chain: str = 'A', sel: str = "protein and not backbone and noh and resname ILE VAL LEU", cutoff_area: float = 10): """ :param chain: Chain in the PDB to compute the hydrophobic clusters. Examples: "A", "A B C". Default: "A" :param sel: VMD selection on which to compute the clusters. Default is every sidechain heavy atom ILE, VAL and LEU residues. "protein and not backbone and noh and resname ILE VAL LEU" :return: A representation for each cluster """ clusters = None # Removing previous visualizations [ self.reps.remove(index) for index, rep in reversed(list(enumerate(self.reps.replist))) ] resids = np.unique(self.get("resid", sel=f"{sel} and chain {chain}")) dims = len(resids) indices = self.get("index", sel=f"{sel} and chain {chain}") dims_indices = len(self.get("index", sel=f"protein and chain {chain}")) logger.info("Initializing final output") contacts = np.zeros((dims, dims)) atoms_to_atoms = np.zeros((dims_indices, dims_indices)) logger.info("Computing clusters") for index in indices: a = Atom(index, self) if not a.neighbor_indices.any(): continue _, contacts = fill_matrices(a, self, atoms_to_atoms, contacts, indices, resids) graph = create_graph(contacts, resids, cutoff_area=cutoff_area) comp, _ = label_components(graph) if comp.a.any(): clusters = add_clusters(self, graph, comp) else: logger.warning( "There are not residues in contact for this selection") return clusters
def __init__(self, hit: Hit): qpdb_path = get_SCOP_domain(hit.query) spdb_path = get_SCOP_domain(hit.sbjct) logger.info(f'Loading {qpdb_path} as a chimera object') self.qPDB = Chimera(qpdb_path, validateElements=False) os.remove(qpdb_path) if self.qPDB.numFrames > 1: self.qPDB.dropFrames(keep=0) logger.info("Query protein contains more than one model. Keeping only the first one") logger.info(f'Loading {spdb_path} as a chimera object') self.sPDB = Chimera(spdb_path, validateElements=False) os.remove(spdb_path) if self.sPDB.numFrames > 1: self.sPDB.dropFrames(keep=0) logger.info("Subject protein contains more than one model. Keeping only the first one") self.qaPDB, self.saPDB = {}, {} self.qpairs,self.spairs = [], [] self.dst = [] self.chim_positions = {}