Exemplo n.º 1
0
    def run(self):
        print("Starting MultiDatasetBenchmarkTool...", flush=True)
        PathBuilder.build(self.result_path)
        specs = self._split_specs_file()
        self._extract_reports()
        instruction_states = {}
        for index, specs_name in enumerate(specs.keys()):
            print(
                f"Running nested cross-validation on dataset {specs_name} ({index+1}/{len(list(specs.keys()))})..",
                flush=True)
            app = ImmuneMLApp(specification_path=specs[specs_name],
                              result_path=f"{self.result_path}/{specs_name}/")
            instruction_states[specs_name] = app.run()[0]
            print(
                f"Finished nested cross-validation on dataset {specs_name} ({index+1}/{len(list(specs.keys()))})..",
                flush=True)

        print(
            "Running reports on the results of nested cross-validation on all datasets...",
            flush=True)
        report_results = self._run_reports(instruction_states)
        print("Finished reports, now generating HTML output...", flush=True)
        MultiDatasetBenchmarkHTMLBuilder.build(
            report_results, self.result_path, {
                specs_name: f"{self.result_path}/{specs_name}/"
                for specs_name in specs.keys()
            })
        print("MultiDatasetBenchmarkTool finished.", flush=True)
Exemplo n.º 2
0
    def run(self, result_path: str):

        result_path = self.build_path(result_path)
        specs_file = self.create_specfication(result_path)

        app = ImmuneMLApp(specs_file, result_path + "quickstart/")
        app.run()
Exemplo n.º 3
0
    def _run(self):
        PathBuilder.build(self.result_path)
        self.update_specs()

        app = ImmuneMLApp(self.yaml_path, self.result_path)
        output_file_path = app.run()

        return output_file_path
    def _run(self):
        yaml_path = main(self.args)

        PathBuilder.build(self.result_path)

        app = ImmuneMLApp(yaml_path, self.result_path)
        output_file_path = app.run()

        return output_file_path
    def run_example(self, specs: dict, path: str):

        PathBuilder.build(path)

        specs_filename = f"{path}specs.yaml"
        with open(specs_filename, "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(specs_filename, path + "result/")
        app.run()

        shutil.rmtree(path)
Exemplo n.º 6
0
    def test_dataset_generation(self):

        path = PathBuilder.build(EnvironmentSettings.tmp_test_path +
                                 "cv_split_variant/")
        repertoire_specs = self.build_specs(path)

        specs_filename = f"{path}specs.yaml"
        with open(specs_filename, "w") as file:
            yaml.dump(repertoire_specs, file)

        app = ImmuneMLApp(specs_filename, path + "result/")
        app.run()

        shutil.rmtree(path)
Exemplo n.º 7
0
    def test_subsampling(self):
        import faulthandler
        faulthandler.enable()

        path = PathBuilder.build(EnvironmentSettings.tmp_test_path + "subsampling_workflow/")
        repertoire_specs = self.build_specs(path)

        specs_filename = f"{path}specs.yaml"
        with open(specs_filename, "w") as file:
            yaml.dump(repertoire_specs, file)

        app = ImmuneMLApp(specs_filename, path + "result/")
        app.run()

        shutil.rmtree(path)
Exemplo n.º 8
0
    def test(self):

        path = PathBuilder.build(EnvironmentSettings.tmp_test_path +
                                 "integration_dataset_gen_html/")
        dataset_path = f"{path}initial_dataset/"

        specs = {
            "definitions": {
                "datasets": {
                    "d1": {
                        "format": "RandomRepertoireDataset",
                        "params": {
                            "repertoire_count": 10,
                            "sequence_count_probabilities": {
                                10: 1
                            },
                            "sequence_length_probabilities": {
                                12: 1
                            },
                            "labels": {},
                            "result_path": dataset_path
                        }
                    }
                }
            },
            "instructions": {
                "instr1": {
                    "type": "DatasetExport",
                    "export_formats": ["Pickle", "AIRR"],
                    "datasets": ["d1"]
                }
            },
            "output": {
                "format": "HTML"
            }
        }

        specs_path = f"{path}specs.yaml"
        with open(specs_path, "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(specs_path, path + "result/")
        app.run()

        shutil.rmtree(path)
Exemplo n.º 9
0
    def _run(self):
        PathBuilder.build(self.result_path)
        self._prepare_specs()
        app = ImmuneMLApp(self.yaml_path, self.result_path)
        app.run()

        model_locations = list(
            glob(self.result_path +
                 f"/{self.instruction_name}/optimal_*/zip/*.zip"))

        model_export_path = PathBuilder.build(self.result_path +
                                              'exported_models/')

        for model_location in model_locations:
            shutil.copyfile(
                model_location,
                model_export_path + os.path.basename(model_location))

        logging.info(
            f"{GalaxyTrainMLModel.__name__}: immuneML has finished and the trained models were exported."
        )
Exemplo n.º 10
0
    def test_simulation(self):
        path = EnvironmentSettings.tmp_test_path + "integration_simulation/"
        self.prepare_dataset(path)
        specs_path = self.prepare_specs(path)

        PathBuilder.build(path + "result/")

        app = ImmuneMLApp(specification_path=specs_path,
                          result_path=path + "result/")
        app.run()

        self.assertTrue(os.path.isfile(path + "result/inst1/metadata.csv"))

        metadata_df = pd.read_csv(path + "result/inst1/metadata.csv",
                                  comment=Constants.COMMENT_SIGN)
        self.assertTrue("signal_signal1" in metadata_df.columns)
        self.assertEqual(17, sum(metadata_df["signal_signal1"]))

        self.assertTrue(os.path.isfile(path + "result/index.html"))
        self.assertTrue(
            os.path.isfile(
                path + "result/inst1/exported_dataset/pickle/d1.iml_dataset"))

        shutil.rmtree(path)
Exemplo n.º 11
0
    def test_ml(self):
        path = PathBuilder.build(EnvironmentSettings.tmp_test_path + "integration_ml/")
        specs_path = self.prepare_specs(path)

        PathBuilder.build(path + "result_export/")

        app = ImmuneMLApp(specification_path=specs_path, result_path=path + "result_export/")
        states = app.run()

        self.assertTrue(os.path.isfile(path + "result_export/index.html"))

        specs_path = self.prepare_import_specs(path)

        app = ImmuneMLApp(specs_path, path + 'result_import/')
        result_path = app.run()

        self.assertTrue(os.path.isfile(path + "result_import/index.html"))

        shutil.rmtree(path)
    def test_encoding(self):

        path = EnvironmentSettings.tmp_test_path + "integration_test_emerson_encoding/"
        PathBuilder.build(path)

        ref_path = path + "reference.csv"
        pd.DataFrame({
            "sequence_aas": ["GGG", "III", "TTT", "EFEF"],
            "v_alleles":
            ["TRBV6-1*01", "TRBV6-1*01", "TRBV6-1*01", "TRBV6-1*01"],
            'j_alleles': ["TRBJ2-7", "TRBJ2-7", "TRBJ2-7", "TRBJ2-7"]
        }).to_csv(ref_path, index=False)

        repertoires, metadata = RepertoireBuilder.build(
            [["GGG", "III", "LLL", "MMM"],
             ["DDD", "EEE", "FFF", "III", "LLL", "MMM"], ["CCC", "FFF", "MMM"],
             ["AAA", "CCC", "EEE", "FFF", "LLL", "MMM"],
             ["GGG", "III", "LLL", "MMM"],
             ["DDD", "EEE", "FFF", "III", "LLL", "MMM"], ["CCC", "FFF", "MMM"],
             ["AAA", "CCC", "EEE", "FFF", "LLL", "MMM"],
             ["GGG", "III", "LLL", "MMM"],
             ["DDD", "EEE", "FFF", "III", "LLL", "MMM"], ["CCC", "FFF", "MMM"],
             ["AAA", "CCC", "EEE", "FFF", "LLL", "MMM"],
             ["GGG", "III", "LLL", "MMM"],
             ["DDD", "EEE", "FFF", "III", "LLL", "MMM"], ["CCC", "FFF", "MMM"],
             ["AAA", "CCC", "EEE", "FFF", "LLL", "MMM"]],
            labels={
                "l1": [
                    True, True, False, False, True, True, False, False, True,
                    True, False, False, True, True, False, False
                ]
            },
            path=path)

        dataset = RepertoireDataset(repertoires=repertoires,
                                    metadata_file=metadata,
                                    params={"l1": [True, False]})
        PickleExporter.export(dataset, path)

        specs = {
            "definitions": {
                "datasets": {
                    "d1": {
                        "format": "Pickle",
                        "params": {
                            "path": path + f"{dataset.name}.iml_dataset",
                        }
                    }
                },
                "encodings": {
                    "e1": {
                        "SequenceAbundance": {
                            'comparison_attributes':
                            ["sequence_aas", "v_alleles", "j_alleles"]
                        }
                    }
                },
                "ml_methods": {
                    "knn": {
                        "KNN": {
                            "n_neighbors": 1
                        },
                    }
                },
                "reports": {
                    "r1": {
                        "ReferenceSequenceOverlap": {
                            "reference_path":
                            ref_path,
                            'comparison_attributes':
                            ["sequence_aas", "v_alleles", "j_alleles"]
                        }
                    }
                }
            },
            "instructions": {
                "inst1": {
                    "type": "TrainMLModel",
                    "settings": [{
                        "encoding": "e1",
                        "ml_method": "knn"
                    }],
                    "assessment": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 0.7,
                        "reports": {}
                    },
                    "selection": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 0.7,
                    },
                    "labels": [{
                        "l1": {
                            "positive_class": True
                        }
                    }],
                    "dataset": "d1",
                    "strategy": "GridSearch",
                    "metrics": ["accuracy"],
                    "number_of_processes": 2,
                    "reports": ["r1"],
                    "optimization_metric": "balanced_accuracy",
                    "refit_optimal_model": True,
                    "store_encoded_data": False
                }
            }
        }

        specs_file = path + "specs.yaml"
        with open(specs_file, "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(specs_file, path + "result/")
        app.run()

        shutil.rmtree(path)
Exemplo n.º 13
0
    def test(self):
        path = PathBuilder.build(EnvironmentSettings.tmp_test_path +
                                 "integration_receptor_cnn_workflow/")

        specs = {
            "definitions": {
                "datasets": {
                    "d1": {
                        "format": "RandomReceptorDataset",
                        "params": {
                            "result_path": path + "generated_dataset/",
                            "receptor_count": 500,
                            "chain_1_length_probabilities": {
                                5: 1.
                            },
                            "chain_2_length_probabilities": {
                                6: 1.
                            },
                            "labels": {
                                "cmv_epitope": {
                                    True: 0.5,
                                    False: 0.5
                                }
                            }
                        }
                    }
                },
                "encodings": {
                    "enc1": {
                        "OneHot": {
                            "use_positional_info": True
                        }
                    }
                },
                "ml_methods": {
                    "cnn": {
                        "ReceptorCNN": {
                            "iteration_count": 1000,
                            "evaluate_at": 10,
                            "batch_size": 100,
                            "number_of_threads": 4
                        }
                    }
                }
            },
            "instructions": {
                "instr1": {
                    "type": "TrainMLModel",
                    "settings": [{
                        "encoding": "enc1",
                        "ml_method": "cnn"
                    }],
                    "assessment": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 0.7,
                    },
                    "selection": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 1,
                    },
                    "labels": ["cmv_epitope"],
                    "dataset": "d1",
                    "strategy": "GridSearch",
                    "metrics": ["accuracy"],
                    "number_of_processes": 4,
                    "reports": None,
                    "optimization_metric": "balanced_accuracy",
                    "refit_optimal_model": False,
                    "store_encoded_data": False
                }
            }
        }

        with open(path + "specs.yaml", "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(path + "specs.yaml", path + 'result/')
        app.run()

        shutil.rmtree(path)
Exemplo n.º 14
0
 def run_tool(yaml_path, result_path):
     PathBuilder.build(result_path)
     app = ImmuneMLApp(yaml_path, result_path)
     app.run()
Exemplo n.º 15
0
    def test_simulation_receptors(self):
        path = PathBuilder.build(EnvironmentSettings.tmp_test_path +
                                 "integration_simulation_receptor/")
        specs = {
            "definitions": {
                "datasets": {
                    "d1": {
                        "format": "RandomReceptorDataset",
                        "params": {
                            "receptor_count": 100,
                            "chain_1_length_probabilities": {
                                10: 1
                            },
                            "chain_2_length_probabilities": {
                                10: 1
                            },
                            "result_path": path + "dataset/",
                            "labels": {}
                        }
                    },
                },
                "motifs": {
                    "motif1": {
                        "seed_chain1": "CC/C",
                        "name_chain1": "ALPHA",
                        "name_chain2": "BETA",
                        "seed_chain2": "F/FF",
                        "instantiation": {
                            "GappedKmer": {
                                "max_gap": 1,
                                "alphabet_weights": None,
                                "position_weights": None
                            },
                        }
                    },
                    "motif2": {
                        "seed_chain1": "CCC",
                        "name_chain1": "ALPHA",
                        "name_chain2": "BETA",
                        "seed_chain2": "FFF",
                        "instantiation": "GappedKmer"
                    }
                },
                "signals": {
                    "signal1": {
                        "motifs": ["motif1", "motif2"],
                        "implanting": "Receptor",
                        "sequence_position_weights": None
                    }
                },
                "simulations": {
                    "sim1": {
                        "var1": {
                            "signals": ["signal1"],
                            "dataset_implanting_rate": 0.5
                        },
                        "var2": {
                            "signals": ["signal1"],
                            "dataset_implanting_rate": 0.5,
                            "is_noise": True
                        }
                    }
                }
            },
            "instructions": {
                "inst1": {
                    "type": "Simulation",
                    "dataset": "d1",
                    "simulation": "sim1",
                    "export_formats": ["Pickle"]
                }
            },
            "output": {
                "format": "HTML"
            }
        }

        with open(path + "specs.yaml", "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(path + "specs.yaml", path + "result/")
        app.run()

        self.assertTrue(os.path.isfile(path + "result/index.html"))
        self.assertTrue(
            os.path.isfile(
                path + "result/inst1/exported_dataset/pickle/d1.iml_dataset"))
        dataset = PickleImport.import_dataset(
            {
                "path":
                path + "result/inst1/exported_dataset/pickle/d1.iml_dataset"
            }, "d1")

        self.assertEqual(100, dataset.get_example_count())
        self.assertEqual(
            100,
            len([
                receptor for receptor in dataset.get_data()
                if "signal_signal1" in receptor.metadata
            ]))

        shutil.rmtree(path)
Exemplo n.º 16
0
    def test_generate(self):

        path = EnvironmentSettings.tmp_test_path + "disease_assoc_seq_cv/"
        PathBuilder.build(path)

        repertoires, metadata = RepertoireBuilder.build(
            [["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"],
             ["GGG", "III", "LLL", "MMM"], ["DDD", "EEE", "FFF"]],
            labels={
                "l1": [
                    True, False, True, False, True, False, True, False, True,
                    False, True, False, True, False
                ]
            },
            path=path)

        dataset = RepertoireDataset(repertoires=repertoires,
                                    metadata_file=metadata,
                                    params={"l1": [True, False]})
        PickleExporter.export(dataset, path)

        specs = {
            "definitions": {
                "datasets": {
                    "d1": {
                        "format": "Pickle",
                        "params": {
                            "path": path + f"{dataset.name}.iml_dataset",
                        }
                    }
                },
                "encodings": {
                    "e1": {
                        "SequenceAbundance": {
                            'p_value_threshold': 0.5
                        }
                    }
                },
                "ml_methods": {
                    "knn": {
                        "KNN": {
                            "n_neighbors": 1
                        },
                    }
                },
                "reports": {
                    "r1": {
                        "DiseaseAssociatedSequenceCVOverlap": {
                            "compare_in_selection": True,
                            "compare_in_assessment": True
                        }
                    }
                }
            },
            "instructions": {
                "inst1": {
                    "type": "TrainMLModel",
                    "settings": [{
                        "encoding": "e1",
                        "ml_method": "knn"
                    }],
                    "assessment": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 0.5,
                        "reports": {}
                    },
                    "selection": {
                        "split_strategy": "random",
                        "split_count": 1,
                        "training_percentage": 0.5,
                    },
                    "labels": [{
                        "l1": {
                            "positive_class": True
                        }
                    }],
                    "dataset": "d1",
                    "strategy": "GridSearch",
                    "metrics": ["accuracy"],
                    "number_of_processes": 2,
                    "reports": ["r1"],
                    "optimization_metric": "balanced_accuracy",
                    "refit_optimal_model": True,
                    "store_encoded_data": False
                }
            }
        }

        specs_file = path + "specs.yaml"
        with open(specs_file, "w") as file:
            yaml.dump(specs, file)

        app = ImmuneMLApp(specs_file, path + "result/")
        state = app.run()[0]

        self.assertEqual(1, len(state.report_results))
        self.assertTrue(len(state.report_results[0].output_figures) > 0)
        self.assertTrue(len(state.report_results[0].output_tables) > 0)

        for fig in state.report_results[0].output_figures:
            self.assertTrue(os.path.isfile(fig.path))
        for table in state.report_results[0].output_tables:
            self.assertTrue(os.path.isfile(table.path))

        shutil.rmtree(path)
Exemplo n.º 17
0
 def _run(self):
     PathBuilder.build(self.result_path)
     self.update_specs()
     state = ImmuneMLApp(self.yaml_path, self.result_path).run()[0]
     shutil.copytree(list(list(state.paths.values())[0].values())[0], self.result_path + "result/")
     print("Exported dataset.")