Exemplo n.º 1
0
    def computeScore(self, name_score: str, *score_info):
        """Compute the given score with given function. Complete rescoring attribute.

        Args:
            type_score (str): Global type of the score, if it's derived from contacts frequencies or residues interface frequencies. Can be contacts|residues.
            name_score (str): Name of the score
            score_fn ([type]): Function to compute score
            score_info : Contains stored informations about current score. Obligatory : score_info[0] = type of score (contacts or residues), score_info[1] : function to compute score. Optional : score_info[2] : for residues, list of entities to compute with (["ligand"], ["receptor"] or both ["ligand", "receptor])

        Raises:
            error.InvalidScore: Raise if global type of the score is invalid
        """
        type_score = score_info[0]
        score_fn = score_info[1]

        if type_score == "contacts":
            score = score_fn(self.contacts)
        elif type_score == "residues":
            if len(score_info) <= 2:
                raise error.InvalidArgument(
                    f"Error in Frequencies.available_scores definitions. Need roles argument for score {name_score}"
                )
            score = score_fn(self.residues_interface, score_info[2])
        else:
            raise error.InvalidScore(
                f"{type_score} is invalid. Choose contacts or residues")

        self.rescoring[name_score] = score
Exemplo n.º 2
0
    def serializeRescoring(self, output_file: str, type_score: List[str] = ["residue_sum", "residue_average", "residue_log_sum", "residue_square_sum", "CONSRANK_U", "CONSRANK", "contact_log_sum", "contact_square_sum"]):
        """Write rescoring results in a file. 

        Args:
            output_file (str): File to write results. The first line is a comment that resumes the number of poses used. The second line is the header with pose index followed by scores as given in scores_to_write list. The next lines are the scores for each pose.
            scores_to_write (List[str], optional): List of scores to write. The scores will be write in the given order. Defaults to ["residue_sum", "residue_average", "residue_log_sum", "residue_square_sum", "CONSRANK_U", "CONSRANK", "contact_log_sum", "contact_square_sum"].

        """
        if type_score == "all":
            scores_to_write = self.freq.available_scores

        else:
            scores_to_write = {}
            for score in type_score:
                if not score in self.freq.available_scores:
                    raise error.InvalidScore(f"{score} score is not valid")
                scores_to_write[score] = self.freq.available_scores[score]

        if not self._nb_rescored_poses:
            raise error.RescoringNotComputed(
                "Poses has not been rescored, call rescorePoses")

        o = open(output_file, "w")
        o.write(
            f"#Rescoring of {self._nb_rescored_poses} poses with frequencies computed on {self.freq.nb_poses_used} poses.\n")
        o.write("#Pose\t" + "\t".join(scores_to_write) + "\n")
        for p in self.rescored_poses:
            o.write(f"{p.index}{p.serializeScores(scores_to_write)}\n")
        o.close()
        logging.info(f"Scores writes to {output_file}")
Exemplo n.º 3
0
    def getScore(self, score) -> float:
        """Get a specific score

        Args:
            score ([type]): Score to get

        Raises:
            error.InvalidScore: Raise if score is not computed or invalid.

        Returns:
            float: score value
        """
        if not score in self.rescoring:
            raise error.InvalidScore(
                f"{score} score is invalid or not computed")

        return self.rescoring[score]
Exemplo n.º 4
0
    def serializeScores(self, scores: List[str]) -> str:
        """Str representation of rescoring

        Args:
            scores (List[str]): Scores for which you want the representation

        Raises:
            error.InvalidScore: Raises if a score is not computed or invalid.

        Returns:
            str: Str representation of scores, "\t" separated, in the order given in scores attributes.

        """
        serialized = ""
        for sc in scores:
            try:
                serialized += f"\t{self.rescoring[sc]}"
            except KeyError:
                raise error.InvalidScore(
                    f"{sc} score is invalid or not computed")

        return serialized
Exemplo n.º 5
0
    def getRankedClusterRepresentatives(self, ranked_by: str) -> List[Tuple[int, 'DockingPP.pose.Pose']]:
        """Get clusters representatives in decreasing order of given score

        Raises:
            error.ClustersNotComputed : Raise when clusters are not computed
            error.InvalidScore: Raise if score is not computed or invalid

        Returns:
            List[Tuple[int, DockingPP.pose.Pose]]: List of tuple where first element is the cluster number and second is the representative pose. 

        Examples:
            For 1BJ1, get CONSRANK_U clusters representatives and display the first 2 indexes 

            >>> representatives = DH.getRankedClusterRepresentatives("CONSRANK_U")
            >>> for clust in representatives[:2]:
            >>>       print("cluster", clust[0], "representative index", clust[1].index)
            cluster 0 representative index 16
            cluster 1 representative index 74

        """

        if not self.clusters:
            raise error.ClustersNotComputed(
                "Clusters have not been computed. Call clusterPoses first")

        if not ranked_by in self.clusters:
            raise error.InvalidScore(
                f"{ranked_by} cluster is invalid or not computed")

        cluster_nb = 0
        list_rep = []
        for rep_pose in self.clusters[ranked_by]:
            list_rep.append((cluster_nb, rep_pose))
            cluster_nb += 1

        return list_rep
Exemplo n.º 6
0
    def rescorePoses(self, nb_poses: int, type_score: str):
        """Rescore N poses according to given type_score. A new score will be computed for each pose

        Args:
            nb_poses (int): Number of poses to rescore
            type_score (str): Score to use

                Available type score
                    * CONSRANK_U : sum of relative frequencies of each contact of the pose
                    * CONSRANK : CONSRANK_U normalised by number of contacts in the pose 
                    * contact_log_sum : sum of log of relative frequencies of each contact of the pose
                    * contact_square_sum : sum of square of relative frequencies of each contact of the pose
                    * residue_sum : sum of relative frequencies of each interface residue (ligand and receptor) of the pose
                    * residue_sum_ligand : sum of relative frequencies for ligand interface residues
                    * residue_sum_receptor : sum of relative frequencies for receptor interface residues
                    * residue_average : residues_sum normalised by number of interface residues in the pose
                    * residue_average_ligand : residues_sum normalised by number of interface residues in the pose, just for ligand residues
                    * residue_average_receptor : residues_sum normalised by number of interface residues in the pose, just for ligand receptor
                    * residue_log_sum : sum of log of relative frequencies of each interface residue of the pose
                    * residue_square_sum : sum of square of relative frequencies of each interface residue of the pose
                    * all : to compute all above scores

        Raises:
            error.IncompatiblePoseNumber: Raise if you try to rescore more poses than poses with contact map
            error.InvalidScore: Raise if you give an invalid type score. 

        Examples:
            For 1BJ1, rescore 2000 poses with all scores and display the first 2

            >>> DH.rescorePoses(2000, type_score = "all")
            >>> for p in DH.poses[:2]:
            >>>     print(p.index, p.rescoring)
            1 {'CONSRANK_U': 38.180000000000014, 'CONSRANK': 0.4061702127659576, 'contact_log_sum': -94.55436888131206, 'contact_square_sum': 17.90280000000001, 'residue_sum': 42.06, 'residue_average': 0.7378947368421053, 'residue_log_sum': -20.625622826512405, 'residues_square_sum': 33.8476}
            2 {'CONSRANK_U': 38.00000000000001, 'CONSRANK': 0.44186046511627913, 'contact_log_sum': -75.18567402702058, 'contact_square_sum': 18.285600000000002, 'residue_sum': 40.12, 'residue_average': 0.7569811320754717, 'residue_log_sum': -17.346104879674645, 'residues_square_sum': 32.776}

        """
        logging.info(
            f'== Rescore poses ==\nNumber of poses : {nb_poses}\n Scores : {type_score}')

        if nb_poses > self._nb_cmap_poses:
            raise error.IncompatiblePoseNumber(
                f"Impossible to rescore {nb_poses} poses, only {self._nb_cmap_poses} have contact map")

        if not self.freq:
            raise error.FrequenciesNotComputed(
                "Frequencies doesn't exist. Call computeFrequencies first.")

        self._nb_rescored_poses = nb_poses

        if type_score == "all":
            scores_to_compute = self.freq.available_scores

        else:
            if not type_score in self.freq.available_scores:
                raise error.InvalidScore(f"{type_score} score is not valid")
            scores_to_compute = {
                type_score: self.freq.available_scores[type_score]}

        for pose in self.rescored_poses:
            for score, score_info in scores_to_compute.items():
                pose.computeScore(score, *score_info)