def test_preprocess_expansion(write_yaml, shared_datadir, add_cli_arguments): config_path = write_yaml( { "file_prefix": str(shared_datadir / "dummy"), "split_size": {"training": 0.6, "testing": 0.2, "validation": 0.2}, } ) add_cli_arguments(config_path) expansion_main() with open(shared_datadir / "dummy_template_library.csv", "r") as fileobj: lines = fileobj.read().splitlines() assert len(lines) == 10 with open(shared_datadir / "dummy_training.csv", "r") as fileobj: lines = fileobj.read().splitlines() assert len(lines) == 6 with open(shared_datadir / "dummy_testing.csv", "r") as fileobj: lines = fileobj.read().splitlines() assert len(lines) == 2 with open(shared_datadir / "dummy_validation.csv", "r") as fileobj: lines = fileobj.read().splitlines() assert len(lines) == 2 data = pd.read_hdf(shared_datadir / "dummy_unique_templates.hdf5", "table") config = Config(config_path) assert len(data) == 2 assert "retro_template" in data.columns assert "library_occurence" in data.columns for column in config["metadata_headers"]: assert column in data.columns
def test_preprocess_expansion_bad_product( write_yaml, shared_datadir, add_cli_arguments ): config_path = write_yaml( { "file_prefix": str(shared_datadir / "dummy_sani"), "split_size": {"training": 0.6, "testing": 0.2, "validation": 0.2}, } ) add_cli_arguments(config_path) with pytest.raises(MoleculeException): expansion_main()
def test_preprocess_expansion_skip_bad_product( write_yaml, shared_datadir, add_cli_arguments ): config_path = write_yaml( { "file_prefix": str(shared_datadir / "dummy_sani"), "split_size": {"training": 0.6, "testing": 0.2, "validation": 0.2}, "remove_unsanitizable_products": True, } ) add_cli_arguments(config_path) expansion_main() with open(shared_datadir / "dummy_sani_template_library.csv", "r") as fileobj: lines = fileobj.read().splitlines() assert len(lines) == 10
def test_preprocess_recommender(write_yaml, shared_datadir, add_cli_arguments): config_path = write_yaml({ "file_prefix": str(shared_datadir / "dummy"), "split_size": { "training": 0.6, "testing": 0.2, "validation": 0.2 }, }) add_cli_arguments(config_path) expansion_main() with open(shared_datadir / "dummy_template_library.csv", "r") as fileobj: lines = fileobj.read().splitlines() assert len(lines) == 10 os.remove(shared_datadir / "dummy_training.csv") os.remove(shared_datadir / "dummy_testing.csv") os.remove(shared_datadir / "dummy_validation.csv") os.remove(shared_datadir / "dummy_unique_templates.hdf5") recommender_main() with open(shared_datadir / "dummy_training.csv", "r") as fileobj: lines = fileobj.read().splitlines() assert len(lines) == 6 with open(shared_datadir / "dummy_testing.csv", "r") as fileobj: lines = fileobj.read().splitlines() assert len(lines) == 2 with open(shared_datadir / "dummy_validation.csv", "r") as fileobj: lines = fileobj.read().splitlines() assert len(lines) == 2 data = pd.read_hdf(shared_datadir / "dummy_unique_templates.hdf5", "table") assert len(data) == 2