def test_top_clusters_criterion_flag(analysis, cluster_selection, expected_value): """ It tests the selection for the clustering method and checks whether the top cluster (A) has the expected top value, e.g. lowest mean binding energy. Parameters ---------- analysis : Analysis object Created automatically by a fixture. cluster_selection : str Selection method, e.g. "rmsd", "population"... see parameters above. expected_value : float Metric value expected to be associated with the selected cluster A. """ output_folder = "cluster_selection_test" csv = os.path.join(output_folder, "info.csv") analysis.generate_clusters( path=output_folder, clustering_type="meanshift", bandwidth=2.5, analysis_nclust=10, max_top_clusters=1, top_clusters_criterion=cluster_selection, min_population=0.01, ) df = pd.read_csv(csv) clusterA_index = df.index[df["Selected labels"] == "A"] (top_value, ) = (df[cs.metric_top_clusters_criterion[cluster_selection]]. iloc[clusterA_index].tolist()) assert top_value == expected_value check_remove_folder(output_folder)
def test_plotter(x, y, z): """ Checks if the scatter and KDE plots are created correctly. Parameters ---------- x : int Metric to x y : Metric to y z : Metric to z """ output_folder = "tmp/plots" check_remove_folder(output_folder) data_handler = DataHandler( sim_path=simulation_path, report_name=REPORT_NAME, trajectory_name=TRAJ_NAME, be_column=5, ) dataframe = data_handler.get_reports_dataframe() plotter = Plotter(dataframe) output_scatter = plotter.plot_two_metrics(x, y, z, output_folder=output_folder) output_kde = plotter.plot_kde(x, y, output_folder=output_folder, kde_structs=10) assert os.path.exists(output_scatter) assert os.path.exists(output_kde)
def test_generate_clusters(analysis, method, bandwidth, n_clusters): """ Checks if built-in clustering methods are producing expected number of clusters. Parameters ---------- method : str Built-in clustering method, e.g. "dbscan". bandwidth : float Bandwidth for meanshift (or epsilon for DBSCAN). n_clusters : int Number of clusters for the Gaussian mixture model. """ working_folder = "clustering_method" analysis.generate_clusters(working_folder, method, bandwidth=bandwidth, analysis_nclust=n_clusters) results = glob.glob(os.path.join(working_folder, "*pdb")) results = [element for element in results if "water" not in element] assert len(results) == n_clusters check_remove_folder(working_folder)
def test_analysis_production(yaml_file, expected_poses, expected_clusters): """ Runs production analysis from input.yaml, both for PDB and XTC trajectories. Parameters ---------- yaml_file : str Path to input.yaml """ job_params = main.run_platform_from_yaml(yaml_file) results_folder = os.path.join(job_params.pele_dir, "results") top_poses = glob.glob(os.path.join(results_folder, "top_poses/*pdb")) clusters = glob.glob(os.path.join(results_folder, "clusters/*pdb")) params_file = os.path.join(results_folder, "parameters.txt") assert len(top_poses) == expected_poses assert len(clusters) == expected_clusters assert os.path.isfile(params_file) with open(params_file, "r") as file: content = file.read() assert "clustering_type: meanshift" in content # Clean up check_remove_folder(results_folder)
def test_analysis_flags(yaml_file, n_expected_outputs, expected_files): """ Runs full simulation with input.yaml with some unusual flags, check the number of top poses, created plots and their names to ensure correct metrics were take into account. Parameters ---------- yaml_file : str Path to input.yaml n_expected_outputs : int Number of expected plots. expected_files : List[str] List of expected plot names. """ output_folder = "../pele_platform/Examples/analysis/data/results" plots_folder = os.path.join(output_folder, "plots") top_poses_folder = os.path.join(output_folder, "top_poses", "*pdb") main.run_platform_from_yaml(yaml_file) # Check if all expected file names are present for file in expected_files: file_path = os.path.join(plots_folder, file) assert os.path.exists(file_path) # Check number of created plots and top poses all_plots = glob.glob(os.path.join(plots_folder, "*png")) assert len(all_plots) == n_expected_outputs all_top_poses = glob.glob(top_poses_folder) assert len(all_top_poses) == 0 check_remove_folder(output_folder)
def test_cluster_representatives_criterion_flag(analysis, criterion, expected): """ Tests the user-defined method of selecting cluster representatives. Parameters ---------- analysis : Analysis object Created by a fixture. criterion : str cluster_representatives_criterion flag defined by the user. expected : str Expected value in the dataframe. TODO: Manually check expected values and then add them to the test to make sure we're getting the right stuff! """ output_folder = "cluster_rep_selection" csv = os.path.join(output_folder, "top_selections.csv") analysis.generate_clusters(path=output_folder, clustering_type="meanshift", bandwidth=2.5, max_top_clusters=1, representatives_criterion=criterion) df = pd.read_csv(csv) assert all(x in df.columns for x in [ "Cluster", "Cluster label", "epoch", "trajectory", "Step", "currentEnergy", "Binding Energy", "sasaLig" ]) assert not df.isnull().values.any() check_remove_folder(output_folder)
def test_working_folder(output="site_finder"): """ Tests custom working folder. """ yaml_file = os.path.join(test_path, "site_finder/input_folder.yaml") helpers.check_remove_folder(output) job, _ = main.run_platform_from_yaml(yaml_file) assert os.path.exists(job.folder)
def test_api_analysis_generation(analysis): """ Runs full analysis workflow (with GMM clustering). """ working_folder = "full_analysis" check_remove_folder(working_folder) n_clusts = 3 analysis.generate(working_folder, "gaussianmixture", analysis_nclust=n_clusts) # Check if reports exist assert os.path.exists(os.path.join(working_folder, "summary.pdf")) # Check plots plots = glob.glob(os.path.join(working_folder, "plots", "*png")) assert len(plots) == 2 # Check top poses top_poses = glob.glob(os.path.join(working_folder, "top_poses", "*pdb")) assert len(top_poses) == 7 # Check clusters clusters = glob.glob(os.path.join(working_folder, "clusters", "*pdb")) assert len( clusters) == 6 # includes water and ligand clusters, so n_clusts x 2 # Check cluster representatives CSV by testing for the presence of columns from both trajectory and metrics dfs top_selections = os.path.join(working_folder, "clusters", "top_selections.csv") df = pd.read_csv(top_selections) assert all([ x in df.columns for x in [ "Cluster label", "epoch", "trajectory", "Step", "currentEnergy", "Binding Energy", "sasaLig", ] ]) # Check if data.csv exists and is not empty data_csv = os.path.join(working_folder, "data.csv") assert os.path.exists(data_csv) with open(data_csv, "r") as file: lines = file.readlines() assert len(lines) == 8 assert ( lines[0] == "Step,numberOfAcceptedPeleSteps,currentEnergy,Binding Energy,sasaLig,epoch,trajectory," "Cluster\n") check_remove_folder(working_folder)
def test_water_clustering(path, topology): """ Tests full water clustering on both XTC and PDB trajectories. """ traj = "xtc" if topology else "pdb" analysis_output = "water_clustering" obj = get_analysis(path, topology, traj) obj.generate_clusters(path=analysis_output, clustering_type="meanshift") # TODO: Write a proper test for water clustering output once it's implemented. check_remove_folder(analysis_output)
def test_check_existing_directory(generate_folders): """ Checks if tester of existing dir Parameters ---------- generate_folders : pytest.fixture Pytest fixture that generates "results" folders for testing """ new_path = Analysis._check_existing_directory("results") assert new_path == "results_3" folders = glob.glob("results*") check_remove_folder(*folders)
def test_inner_clustering(analysis, multi, expected): """ Checks if inner clustering is performed correctly. """ working_folder = "inner_clustering" analysis.generate_clusters( working_folder, "meanshift", bandwidth=30, representatives_criterion="multi {}".format(multi), ) results = glob.glob(os.path.join(working_folder, "*pdb")) results = [element for element in results if "water" not in element] assert len(results) == expected check_remove_folder(working_folder)
def test_analysis_production(yaml_file, expected_poses, expected_clusters): """ Runs production analysis from input.yaml, both for PDB and XTC trajectories. Parameters ---------- yaml_file : str Path to input.yaml """ job_params = main.run_platform_from_yaml(yaml_file) results_folder = os.path.join(job_params.pele_dir, "results") top_poses = glob.glob(os.path.join(results_folder, "top_poses/*pdb")) clusters = glob.glob(os.path.join(results_folder, "clusters/*pdb")) assert len(top_poses) == expected_poses assert len(clusters) == expected_clusters # Clean up check_remove_folder(results_folder)
def test_generate_top_poses(analysis, n_poses, expected_energies): """ Checks if data_handler extracts the correct number of top poses and associated metrics. """ output_folder = "tmp/top_poses" check_remove_folder(output_folder) top_poses = analysis.generate_top_poses(output_folder, n_poses) top_poses_rounded = [round(pose, 3) for pose in top_poses] # Check if correct energy values were extracted assert len(top_poses) == n_poses for energy in expected_energies: assert energy in top_poses_rounded # Check if correct number of files was saved results = [ os.path.basename(file) for file in glob.glob(os.path.join(output_folder, "*pdb")) ] assert len(results) == n_poses
def test_extract_poses(analysis): """ Tests poses extraction from dataframe. Parameters ---------- analysis : Analysis object Created in analysis fixture. """ output = "extracted_poses" check_remove_folder(output) values = analysis._extract_poses(analysis._dataframe, "currentEnergy", output) poses = glob.glob(os.path.join(output, "*pdb")) assert values.sort() == expected_energies.sort() assert len(poses) == 7 check_remove_folder(output)