def test_format_weights_valueerror(feature_weights): """ Test if wrong data type of input feature weights raises TypeError. """ with pytest.raises(ValueError): format_weights(feature_weights)
def from_feature_distances(cls, feature_distances, feature_weights=None): """ Get fingerprint distance. Parameters ---------- feature_distances : kissim.similarity.FeatureDistances Distances and bit coverages between two fingerprints for each of their features. feature_weights : None or list of float Feature weights of the following form: (i) None Default feature weights: All features equally distributed to 1/15 (15 features in total). (ii) By feature (list of 15 floats): Features to be set in the following order: size, hbd, hba, charge, aromatic, aliphatic, sco, exposure, distance_to_centroid, distance_to_hinge_region, distance_to_dfg_region, distance_to_front_pocket, moment1, moment2, and moment3. All floats must sum up to 1.0. Returns ------- kissim.comparison.FingerprintDistance Fingerprint distance. """ fingerprint_distance = cls() # Get data of interest from input weights = format_weights(feature_weights) bit_coverages = feature_distances.bit_coverages distances = feature_distances.distances # Set class attributes fingerprint_distance.structure_pair_ids = feature_distances.structure_pair_ids fingerprint_distance.kinase_pair_ids = feature_distances.kinase_pair_ids # Calculate weighted sum of feature bit coverages fingerprint_distance.distance = fingerprint_distance._distance( distances, weights) # Calculate weighted sum of feature distances fingerprint_distance.bit_coverage = fingerprint_distance._bit_coverage( bit_coverages, weights) return fingerprint_distance
def test_format_weights(feature_weights, feature_weights_formatted): """ Test if feature weights are added correctly to feature distance DataFrame. Parameters ---------- feature_weights : None or list of float Feature weights. feature_weights_formatted : list of float Formatted feature weights of length 15. """ feature_weights_formatted_calculated = format_weights(feature_weights) assert np.isclose( np.std(feature_weights_formatted), np.std(feature_weights_formatted_calculated), rtol=1e-04, )
def from_feature_distances_generator(cls, feature_distances_generator, feature_weights=None): """ Generate fingerprint distances for multiple fingerprint pairs based on their feature distances, given a feature weighting scheme. Uses parallel computing of fingerprint pairs. Parameters ---------- feature_distances_generator : kissim.similarity.FeatureDistancesGenerator Feature distances for multiple fingerprint pairs. feature_weights : None or list of float Feature weights of the following form: (i) None Default feature weights: All features equally distributed to 1/15 (15 features in total). (ii) By feature (list of 15 floats): Features to be set in the following order: size, hbd, hba, charge, aromatic, aliphatic, sco, exposure, distance_to_centroid, distance_to_hinge_region, distance_to_dfg_region, distance_to_front_pocket, moment1, moment2, and moment3. All floats must sum up to 1.0. Returns ------- kissim.comparison.FingerprintDistanceGenerator Fingerprint distance generator. """ logger.info("GENERATE FINGERPRINT DISTANCES") # logger.info(f"Number of input feature distances: {len(feature_distances_generator.data)}") start_time = datetime.datetime.now() logger.info( f"Fingerprint distance generation started at: {start_time}") # Format input feature weights feature_weights = format_weights(feature_weights) logger.info(f"Feature weights: {feature_weights}") # Weighted sum of pairwise feature distances and bit coverages fingerprint_distance = FingerprintDistance() distances = [ fingerprint_distance._distance(distances, feature_weights) for distances in tqdm( feature_distances_generator.distances, desc="Calculate pairwise fingerprint distance", ) ] bit_coverages = [ fingerprint_distance._bit_coverage(bit_coverages, feature_weights) for bit_coverages in tqdm( feature_distances_generator.bit_coverages, desc="Calculate pairwise fingerprint coverage", ) ] # Set class attributes fingerprint_distance_generator = cls() fingerprint_distance_generator.data = feature_distances_generator.data[ ["structure.1", "structure.2", "kinase.1", "kinase.2"]].copy() fingerprint_distance_generator.data["distance"] = distances fingerprint_distance_generator.data["bit_coverage"] = bit_coverages fingerprint_distance_generator.structure_kinase_ids = ( feature_distances_generator.structure_kinase_ids) logger.info( f"Number of output fingerprint distances: {len(fingerprint_distance_generator.data)}" ) end_time = datetime.datetime.now() logger.info(f"Runtime: {end_time - start_time}") return fingerprint_distance_generator