Пример #1
0
def test_format_weights_valueerror(feature_weights):
    """
    Test if wrong data type of input feature weights raises TypeError.
    """

    with pytest.raises(ValueError):
        format_weights(feature_weights)
Пример #2
0
    def from_feature_distances(cls, feature_distances, feature_weights=None):
        """
        Get fingerprint distance.

        Parameters
        ----------
        feature_distances : kissim.similarity.FeatureDistances
            Distances and bit coverages between two fingerprints for each of their features.
        feature_weights : None or list of float
            Feature weights of the following form:
            (i) None
                Default feature weights: All features equally distributed to 1/15
                (15 features in total).
            (ii) By feature (list of 15 floats):
                Features to be set in the following order: size, hbd, hba, charge, aromatic,
                aliphatic, sco, exposure, distance_to_centroid, distance_to_hinge_region,
                distance_to_dfg_region, distance_to_front_pocket, moment1, moment2, and moment3.
                All floats must sum up to 1.0.

        Returns
        -------
        kissim.comparison.FingerprintDistance
            Fingerprint distance.
        """

        fingerprint_distance = cls()

        # Get data of interest from input
        weights = format_weights(feature_weights)
        bit_coverages = feature_distances.bit_coverages
        distances = feature_distances.distances

        # Set class attributes
        fingerprint_distance.structure_pair_ids = feature_distances.structure_pair_ids
        fingerprint_distance.kinase_pair_ids = feature_distances.kinase_pair_ids

        # Calculate weighted sum of feature bit coverages
        fingerprint_distance.distance = fingerprint_distance._distance(
            distances, weights)
        # Calculate weighted sum of feature distances
        fingerprint_distance.bit_coverage = fingerprint_distance._bit_coverage(
            bit_coverages, weights)

        return fingerprint_distance
Пример #3
0
def test_format_weights(feature_weights, feature_weights_formatted):
    """
    Test if feature weights are added correctly to feature distance DataFrame.

    Parameters
    ----------
    feature_weights : None or list of float
        Feature weights.
    feature_weights_formatted : list of float
        Formatted feature weights of length 15.
    """

    feature_weights_formatted_calculated = format_weights(feature_weights)

    assert np.isclose(
        np.std(feature_weights_formatted),
        np.std(feature_weights_formatted_calculated),
        rtol=1e-04,
    )
Пример #4
0
    def from_feature_distances_generator(cls,
                                         feature_distances_generator,
                                         feature_weights=None):
        """
        Generate fingerprint distances for multiple fingerprint pairs based on their feature
        distances, given a feature weighting scheme.
        Uses parallel computing of fingerprint pairs.

        Parameters
        ----------
        feature_distances_generator : kissim.similarity.FeatureDistancesGenerator
            Feature distances for multiple fingerprint pairs.
        feature_weights : None or list of float
            Feature weights of the following form:
            (i) None
                Default feature weights: All features equally distributed to 1/15
                (15 features in total).
            (ii) By feature (list of 15 floats):
                Features to be set in the following order: size, hbd, hba, charge, aromatic,
                aliphatic, sco, exposure, distance_to_centroid, distance_to_hinge_region,
                distance_to_dfg_region, distance_to_front_pocket, moment1, moment2, and moment3.
                All floats must sum up to 1.0.

        Returns
        -------
        kissim.comparison.FingerprintDistanceGenerator
            Fingerprint distance generator.
        """

        logger.info("GENERATE FINGERPRINT DISTANCES")
        # logger.info(f"Number of input feature distances: {len(feature_distances_generator.data)}")

        start_time = datetime.datetime.now()
        logger.info(
            f"Fingerprint distance generation started at: {start_time}")

        # Format input feature weights
        feature_weights = format_weights(feature_weights)
        logger.info(f"Feature weights: {feature_weights}")

        # Weighted sum of pairwise feature distances and bit coverages
        fingerprint_distance = FingerprintDistance()
        distances = [
            fingerprint_distance._distance(distances, feature_weights)
            for distances in tqdm(
                feature_distances_generator.distances,
                desc="Calculate pairwise fingerprint distance",
            )
        ]
        bit_coverages = [
            fingerprint_distance._bit_coverage(bit_coverages, feature_weights)
            for bit_coverages in tqdm(
                feature_distances_generator.bit_coverages,
                desc="Calculate pairwise fingerprint coverage",
            )
        ]

        # Set class attributes

        fingerprint_distance_generator = cls()
        fingerprint_distance_generator.data = feature_distances_generator.data[
            ["structure.1", "structure.2", "kinase.1", "kinase.2"]].copy()
        fingerprint_distance_generator.data["distance"] = distances
        fingerprint_distance_generator.data["bit_coverage"] = bit_coverages
        fingerprint_distance_generator.structure_kinase_ids = (
            feature_distances_generator.structure_kinase_ids)

        logger.info(
            f"Number of output fingerprint distances: {len(fingerprint_distance_generator.data)}"
        )

        end_time = datetime.datetime.now()
        logger.info(f"Runtime: {end_time - start_time}")

        return fingerprint_distance_generator