示例#1
0
def compare_from_cli(args):
    """
    Compare fingerprints.

    Parameters
    ----------
    args : argsparse.Namespace
        CLI arguments.
    """

    configure_logger(Path(args.output) / "distances.log")
    fingerprint_generator = FingerprintGenerator.from_json(args.input)
    compare(fingerprint_generator, args.output, args.weights, args.ncores)
示例#2
0
def outliers(fingerprints_path,
             distance_cutoff,
             fingerprints_wo_outliers_path=None):
    """
    Remove outlier fingerprints (defined by spatial distances maximum).

    Parameters
    ----------
    fingerprints_path : str or pathlib.Path
        Path to fingerprints JSON file.
    distance_cutoff : float
        Tolerated distance maximum; fingerprints with distances greater than this cutoff will be
        removed.
    fingerprints_wo_outliers_path : None or str or pathlib.Path
        Path to fingerprints JSON file with outliers removed.

    Returns
    -------
    kissim.encoding.FingerprintGenerator
        Fingerprints without outliers.
    """

    # Load fingerprints
    logger.info("Read fingerprints...")
    fingerprints_path = Path(fingerprints_path)
    fingerprint_generator = FingerprintGenerator.from_json(fingerprints_path)
    logger.info(f"Number of fingerprints: {len(fingerprint_generator.data)}")

    # Find structures/fingerprints IDs to be removed
    logger.info(f"Use the following distance minimum/maximum cutoffs"
                f" to identify outlier structures: {distance_cutoff}")
    remove_structure_ids = []
    for structure_id, fp in fingerprint_generator.data.items():
        if (fp.distances > distance_cutoff).any().any():
            remove_structure_ids.append(structure_id)
    logger.info(f"Structure IDs to be removed: {remove_structure_ids}")

    # Remove fingerprints
    logger.info("Remove fingerprints with distance outliers...")
    for structure_id in remove_structure_ids:
        del fingerprint_generator.data[structure_id]
    logger.info(f"Number of fingerprints: {len(fingerprint_generator.data)}")

    # Optionally: Save to file
    if fingerprints_wo_outliers_path is not None:
        logger.info(
            f"Save cleaned fingerprints to {fingerprints_wo_outliers_path}...")
        fingerprints_wo_outliers_path = Path(fingerprints_wo_outliers_path)
        fingerprint_generator.to_json(fingerprints_wo_outliers_path)

    return fingerprint_generator
示例#3
0
def test_main_encode(args):
    """
    Test CLI for encoding using subprocesses.
    """

    output = Path("fingerprints.json")
    args = args.split()

    with enter_temp_directory():
        subprocess.run(args, check=True)

        # Json file there?
        assert output.exists()
        # Log file there?
        assert Path(f"{output.stem}.log").exists()

        # Json file can be loaded as FingerprintGenerator object?
        fingerprint_generator = FingerprintGenerator.from_json(output)
        assert isinstance(fingerprint_generator, FingerprintGenerator)
        assert isinstance(list(fingerprint_generator.data.values())[0], Fingerprint)
    def test_to_from_json(self, structure_klifs_ids, normalize,
                          values_array_sum):
        """
        Test if saving/loading a fingerprint to/from a json file.
        """

        fingerprints = FingerprintGenerator.from_structure_klifs_ids(
            structure_klifs_ids, LOCAL, 1)
        json_filepath = Path("fingerprints.json")

        with enter_temp_directory():

            # Save json file
            fingerprints.to_json(json_filepath)
            assert json_filepath.exists()

            # Load json file
            fingerprints_reloaded = FingerprintGenerator.from_json(
                json_filepath, normalize)

        assert isinstance(fingerprints_reloaded, FingerprintGenerator)
        # Attribute data
        assert list(fingerprints.data.keys()) == list(
            fingerprints_reloaded.data.keys())
        if normalize:
            assert list(fingerprints.data_normalized.keys()) == list(
                fingerprints_reloaded.data_normalized.keys())
        else:
            assert fingerprints_reloaded.data_normalized is None
        values_array_sum_calculated = sum([
            np.nansum(fingerprint.values_array(True, True, True))
            for structure_klifs_id, fingerprint in
            fingerprints_reloaded.data.items()
        ])
        assert pytest.approx(values_array_sum_calculated,
                             abs=1e-4) == values_array_sum