def run(self, result_path: str): result_path = self.build_path(result_path) specs_file = self.create_specfication(result_path) app = ImmuneMLApp(specs_file, result_path + "quickstart/") app.run()
def run_example(self, specs: dict, path: str): PathBuilder.build(path) specs_filename = f"{path}specs.yaml" with open(specs_filename, "w") as file: yaml.dump(specs, file) app = ImmuneMLApp(specs_filename, path + "result/") app.run() shutil.rmtree(path)
def test_dataset_generation(self): path = PathBuilder.build(EnvironmentSettings.tmp_test_path + "cv_split_variant/") repertoire_specs = self.build_specs(path) specs_filename = f"{path}specs.yaml" with open(specs_filename, "w") as file: yaml.dump(repertoire_specs, file) app = ImmuneMLApp(specs_filename, path + "result/") app.run() shutil.rmtree(path)
def test_subsampling(self): import faulthandler faulthandler.enable() path = PathBuilder.build(EnvironmentSettings.tmp_test_path + "subsampling_workflow/") repertoire_specs = self.build_specs(path) specs_filename = f"{path}specs.yaml" with open(specs_filename, "w") as file: yaml.dump(repertoire_specs, file) app = ImmuneMLApp(specs_filename, path + "result/") app.run() shutil.rmtree(path)
def run(self): print("Starting MultiDatasetBenchmarkTool...", flush=True) PathBuilder.build(self.result_path) specs = self._split_specs_file() self._extract_reports() instruction_states = {} for index, specs_name in enumerate(specs.keys()): print( f"Running nested cross-validation on dataset {specs_name} ({index+1}/{len(list(specs.keys()))})..", flush=True) app = ImmuneMLApp(specification_path=specs[specs_name], result_path=f"{self.result_path}/{specs_name}/") instruction_states[specs_name] = app.run()[0] print( f"Finished nested cross-validation on dataset {specs_name} ({index+1}/{len(list(specs.keys()))})..", flush=True) print( "Running reports on the results of nested cross-validation on all datasets...", flush=True) report_results = self._run_reports(instruction_states) print("Finished reports, now generating HTML output...", flush=True) MultiDatasetBenchmarkHTMLBuilder.build( report_results, self.result_path, { specs_name: f"{self.result_path}/{specs_name}/" for specs_name in specs.keys() }) print("MultiDatasetBenchmarkTool finished.", flush=True)
def _run(self): PathBuilder.build(self.result_path) self.update_specs() app = ImmuneMLApp(self.yaml_path, self.result_path) output_file_path = app.run() return output_file_path
def test(self): path = PathBuilder.build(EnvironmentSettings.tmp_test_path + "integration_dataset_gen_html/") dataset_path = f"{path}initial_dataset/" specs = { "definitions": { "datasets": { "d1": { "format": "RandomRepertoireDataset", "params": { "repertoire_count": 10, "sequence_count_probabilities": { 10: 1 }, "sequence_length_probabilities": { 12: 1 }, "labels": {}, "result_path": dataset_path } } } }, "instructions": { "instr1": { "type": "DatasetExport", "export_formats": ["Pickle", "AIRR"], "datasets": ["d1"] } }, "output": { "format": "HTML" } } specs_path = f"{path}specs.yaml" with open(specs_path, "w") as file: yaml.dump(specs, file) app = ImmuneMLApp(specs_path, path + "result/") app.run() shutil.rmtree(path)
def _run(self): yaml_path = main(self.args) PathBuilder.build(self.result_path) app = ImmuneMLApp(yaml_path, self.result_path) output_file_path = app.run() return output_file_path
def test_ml(self): path = PathBuilder.build(EnvironmentSettings.tmp_test_path + "integration_ml/") specs_path = self.prepare_specs(path) PathBuilder.build(path + "result_export/") app = ImmuneMLApp(specification_path=specs_path, result_path=path + "result_export/") states = app.run() self.assertTrue(os.path.isfile(path + "result_export/index.html")) specs_path = self.prepare_import_specs(path) app = ImmuneMLApp(specs_path, path + 'result_import/') result_path = app.run() self.assertTrue(os.path.isfile(path + "result_import/index.html")) shutil.rmtree(path)
def _run(self): PathBuilder.build(self.result_path) self._prepare_specs() app = ImmuneMLApp(self.yaml_path, self.result_path) app.run() model_locations = list( glob(self.result_path + f"/{self.instruction_name}/optimal_*/zip/*.zip")) model_export_path = PathBuilder.build(self.result_path + 'exported_models/') for model_location in model_locations: shutil.copyfile( model_location, model_export_path + os.path.basename(model_location)) logging.info( f"{GalaxyTrainMLModel.__name__}: immuneML has finished and the trained models were exported." )
def test_simulation(self): path = EnvironmentSettings.tmp_test_path + "integration_simulation/" self.prepare_dataset(path) specs_path = self.prepare_specs(path) PathBuilder.build(path + "result/") app = ImmuneMLApp(specification_path=specs_path, result_path=path + "result/") app.run() self.assertTrue(os.path.isfile(path + "result/inst1/metadata.csv")) metadata_df = pd.read_csv(path + "result/inst1/metadata.csv", comment=Constants.COMMENT_SIGN) self.assertTrue("signal_signal1" in metadata_df.columns) self.assertEqual(17, sum(metadata_df["signal_signal1"])) self.assertTrue(os.path.isfile(path + "result/index.html")) self.assertTrue( os.path.isfile( path + "result/inst1/exported_dataset/pickle/d1.iml_dataset")) shutil.rmtree(path)
def test_generate(self): path = EnvironmentSettings.tmp_test_path + "disease_assoc_seq_cv/" PathBuilder.build(path) repertoires, metadata = RepertoireBuilder.build( [["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"], ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"], ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"], ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"], ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"], ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"], ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"]], labels={ "l1": [ True, False, True, False, True, False, True, False, True, False, True, False, True, False ] }, path=path) dataset = RepertoireDataset(repertoires=repertoires, metadata_file=metadata, params={"l1": [True, False]}) PickleExporter.export(dataset, path) specs = { "definitions": { "datasets": { "d1": { "format": "Pickle", "params": { "path": path + f"{dataset.name}.iml_dataset", } } }, "encodings": { "e1": { "SequenceAbundance": { 'p_value_threshold': 0.5 } } }, "ml_methods": { "knn": { "KNN": { "n_neighbors": 1 }, } }, "reports": { "r1": { "DiseaseAssociatedSequenceCVOverlap": { "compare_in_selection": True, "compare_in_assessment": True } } } }, "instructions": { "inst1": { "type": "TrainMLModel", "settings": [{ "encoding": "e1", "ml_method": "knn" }], "assessment": { "split_strategy": "random", "split_count": 1, "training_percentage": 0.5, "reports": {} }, "selection": { "split_strategy": "random", "split_count": 1, "training_percentage": 0.5, }, "labels": [{ "l1": { "positive_class": True } }], "dataset": "d1", "strategy": "GridSearch", "metrics": ["accuracy"], "number_of_processes": 2, "reports": ["r1"], "optimization_metric": "balanced_accuracy", "refit_optimal_model": True, "store_encoded_data": False } } } specs_file = path + "specs.yaml" with open(specs_file, "w") as file: yaml.dump(specs, file) app = ImmuneMLApp(specs_file, path + "result/") state = app.run()[0] self.assertEqual(1, len(state.report_results)) self.assertTrue(len(state.report_results[0].output_figures) > 0) self.assertTrue(len(state.report_results[0].output_tables) > 0) for fig in state.report_results[0].output_figures: self.assertTrue(os.path.isfile(fig.path)) for table in state.report_results[0].output_tables: self.assertTrue(os.path.isfile(table.path)) shutil.rmtree(path)
def test_encoding(self): path = EnvironmentSettings.tmp_test_path + "integration_test_emerson_encoding/" PathBuilder.build(path) ref_path = path + "reference.csv" pd.DataFrame({ "sequence_aas": ["GGG", "III", "TTT", "EFEF"], "v_alleles": ["TRBV6-1*01", "TRBV6-1*01", "TRBV6-1*01", "TRBV6-1*01"], 'j_alleles': ["TRBJ2-7", "TRBJ2-7", "TRBJ2-7", "TRBJ2-7"] }).to_csv(ref_path, index=False) repertoires, metadata = RepertoireBuilder.build( [["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF", "III", "LLL", "MMM"], ["CCC", "FFF", "MMM"], ["AAA", "CCC", "EEE", "FFF", "LLL", "MMM"], ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF", "III", "LLL", "MMM"], ["CCC", "FFF", "MMM"], ["AAA", "CCC", "EEE", "FFF", "LLL", "MMM"], ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF", "III", "LLL", "MMM"], ["CCC", "FFF", "MMM"], ["AAA", "CCC", "EEE", "FFF", "LLL", "MMM"], ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF", "III", "LLL", "MMM"], ["CCC", "FFF", "MMM"], ["AAA", "CCC", "EEE", "FFF", "LLL", "MMM"]], labels={ "l1": [ True, True, False, False, True, True, False, False, True, True, False, False, True, True, False, False ] }, path=path) dataset = RepertoireDataset(repertoires=repertoires, metadata_file=metadata, params={"l1": [True, False]}) PickleExporter.export(dataset, path) specs = { "definitions": { "datasets": { "d1": { "format": "Pickle", "params": { "path": path + f"{dataset.name}.iml_dataset", } } }, "encodings": { "e1": { "SequenceAbundance": { 'comparison_attributes': ["sequence_aas", "v_alleles", "j_alleles"] } } }, "ml_methods": { "knn": { "KNN": { "n_neighbors": 1 }, } }, "reports": { "r1": { "ReferenceSequenceOverlap": { "reference_path": ref_path, 'comparison_attributes': ["sequence_aas", "v_alleles", "j_alleles"] } } } }, "instructions": { "inst1": { "type": "TrainMLModel", "settings": [{ "encoding": "e1", "ml_method": "knn" }], "assessment": { "split_strategy": "random", "split_count": 1, "training_percentage": 0.7, "reports": {} }, "selection": { "split_strategy": "random", "split_count": 1, "training_percentage": 0.7, }, "labels": [{ "l1": { "positive_class": True } }], "dataset": "d1", "strategy": "GridSearch", "metrics": ["accuracy"], "number_of_processes": 2, "reports": ["r1"], "optimization_metric": "balanced_accuracy", "refit_optimal_model": True, "store_encoded_data": False } } } specs_file = path + "specs.yaml" with open(specs_file, "w") as file: yaml.dump(specs, file) app = ImmuneMLApp(specs_file, path + "result/") app.run() shutil.rmtree(path)
def run_tool(yaml_path, result_path): PathBuilder.build(result_path) app = ImmuneMLApp(yaml_path, result_path) app.run()
def test_simulation_receptors(self): path = PathBuilder.build(EnvironmentSettings.tmp_test_path + "integration_simulation_receptor/") specs = { "definitions": { "datasets": { "d1": { "format": "RandomReceptorDataset", "params": { "receptor_count": 100, "chain_1_length_probabilities": { 10: 1 }, "chain_2_length_probabilities": { 10: 1 }, "result_path": path + "dataset/", "labels": {} } }, }, "motifs": { "motif1": { "seed_chain1": "CC/C", "name_chain1": "ALPHA", "name_chain2": "BETA", "seed_chain2": "F/FF", "instantiation": { "GappedKmer": { "max_gap": 1, "alphabet_weights": None, "position_weights": None }, } }, "motif2": { "seed_chain1": "CCC", "name_chain1": "ALPHA", "name_chain2": "BETA", "seed_chain2": "FFF", "instantiation": "GappedKmer" } }, "signals": { "signal1": { "motifs": ["motif1", "motif2"], "implanting": "Receptor", "sequence_position_weights": None } }, "simulations": { "sim1": { "var1": { "signals": ["signal1"], "dataset_implanting_rate": 0.5 }, "var2": { "signals": ["signal1"], "dataset_implanting_rate": 0.5, "is_noise": True } } } }, "instructions": { "inst1": { "type": "Simulation", "dataset": "d1", "simulation": "sim1", "export_formats": ["Pickle"] } }, "output": { "format": "HTML" } } with open(path + "specs.yaml", "w") as file: yaml.dump(specs, file) app = ImmuneMLApp(path + "specs.yaml", path + "result/") app.run() self.assertTrue(os.path.isfile(path + "result/index.html")) self.assertTrue( os.path.isfile( path + "result/inst1/exported_dataset/pickle/d1.iml_dataset")) dataset = PickleImport.import_dataset( { "path": path + "result/inst1/exported_dataset/pickle/d1.iml_dataset" }, "d1") self.assertEqual(100, dataset.get_example_count()) self.assertEqual( 100, len([ receptor for receptor in dataset.get_data() if "signal_signal1" in receptor.metadata ])) shutil.rmtree(path)
def test(self): path = PathBuilder.build(EnvironmentSettings.tmp_test_path + "integration_receptor_cnn_workflow/") specs = { "definitions": { "datasets": { "d1": { "format": "RandomReceptorDataset", "params": { "result_path": path + "generated_dataset/", "receptor_count": 500, "chain_1_length_probabilities": { 5: 1. }, "chain_2_length_probabilities": { 6: 1. }, "labels": { "cmv_epitope": { True: 0.5, False: 0.5 } } } } }, "encodings": { "enc1": { "OneHot": { "use_positional_info": True } } }, "ml_methods": { "cnn": { "ReceptorCNN": { "iteration_count": 1000, "evaluate_at": 10, "batch_size": 100, "number_of_threads": 4 } } } }, "instructions": { "instr1": { "type": "TrainMLModel", "settings": [{ "encoding": "enc1", "ml_method": "cnn" }], "assessment": { "split_strategy": "random", "split_count": 1, "training_percentage": 0.7, }, "selection": { "split_strategy": "random", "split_count": 1, "training_percentage": 1, }, "labels": ["cmv_epitope"], "dataset": "d1", "strategy": "GridSearch", "metrics": ["accuracy"], "number_of_processes": 4, "reports": None, "optimization_metric": "balanced_accuracy", "refit_optimal_model": False, "store_encoded_data": False } } } with open(path + "specs.yaml", "w") as file: yaml.dump(specs, file) app = ImmuneMLApp(path + "specs.yaml", path + 'result/') app.run() shutil.rmtree(path)