Пример #1
0
    def run(self):
        ensemble = pf.values_from_conf(self.conf, "ensemble")
        ensemble = ensemble[self.ensemble_id]
        exper = pf.values_from_conf(self.conf, "experiment")
        ensemble_properties = exper[ensemble["exper_ids"][0]]

        # Get paths to features on which to generate predictions
        specifiers_list = [
            ensemble_properties["preprocessing"],
            ensemble_properties["validation_prop"],
            ensemble_properties["k_folds"],
            ensemble_properties["features"]
        ]

        x_basename = pf.output_name(
            self.conf,
            specifiers_list,
            "features_",
            "features"
        )

        pred_basename = pf.output_name(
            self.conf,
            self.ensemble_id,
            "ensemble_preds_",
            "preds"
        )

        model_basename = pf.output_name(
            self.conf,
            self.ensemble_id,
            "ensemble_model-",
            "models"
        )

        for train_type in ["cv", "full"]:
            for test_type in ["all", "test-all-cv"]:
                return_code = subprocess.call(
                    [
                        "Rscript",
                        pf.rscript_file(self.conf, "predict.R"),
                        x_basename + "-" + test_type + ".feather",
                        model_basename + "-all-" + train_type + "_trained.RData",
                        pred_basename + "-" + train_type + "_trained-" + test_type + ".feather"
                    ]
                )

            if return_code != 0:
                raise ValueError("predict.R failed")
    def output(self):
        specifiers_list = [
            self.preprocess_conf, self.validation_prop, self.k_folds
        ]
        result_path = pf.output_name(self.conf, specifiers_list,
                                     "cv_") + ".feather"

        return luigi.LocalTarget(result_path)
Пример #3
0
    def run(self):
        specifiers_list = [
            self.preprocess_conf,
            self.validation_prop,
            self.k_folds,
            self.features_conf,
            self.model_conf
        ]

        x_path = pf.output_name(
            self.conf,
            specifiers_list[:4],
            "features_",
            "features"
        ) + "-test-" + str(self.cur_fold)  + ".feather"
        if str(self.cur_fold) == "all":
            x_path = x_path.replace("test-", "")

        pred_path = pf.output_name(
            self.conf,
            specifiers_list,
            "preds_",
            "preds"
        ) + "-" + str(self.cur_fold) + ".feather"
        model_path = pf.output_name(
            self.conf,
            specifiers_list,
            "model_",
            "models"
        ) + "-" + str(self.cur_fold) + ".RData"

        return_code = subprocess.call(
            [
                "Rscript",
                pf.rscript_file(self.conf, "predict.R"),
                x_path,
                model_path,
                pred_path
            ]
        )

        if return_code != 0:
            raise ValueError("predict.R failed")
Пример #4
0
    def run(self):
        ensemble = pf.values_from_conf(self.conf, "ensemble")
        ensemble = ensemble[self.ensemble_id]

        exper = pf.values_from_conf(self.conf, "experiment")
        preds_basenames = ""
        models_basenames = ""

        # get paths to experiment results we needs in ensembling
        for i in exper.keys():
            if i not in ensemble["exper_ids"]:
                continue

            specifiers_list = [
                exper[i]["preprocessing"], exper[i]["validation_prop"],
                exper[i]["k_folds"], exper[i]["features"], exper[i]["model"]
            ]

            preds_basenames += pf.output_name(self.conf, specifiers_list,
                                              "preds_", "preds") + ";"
            models_basenames += pf.output_name(self.conf, specifiers_list,
                                               "model_", "models") + ";"

            # These are assumed constant over experiments, so safe to overwrite
            y_basename = pf.output_name(self.conf, specifiers_list[:3],
                                        "responses_", "responses")
            k_folds = exper[i]["k_folds"]

        # Now call the ensemble script
        output_prefix = pf.output_name(self.conf, self.ensemble_id,
                                       "ensemble_model-", "models")

        return_code = subprocess.call([
            "Rscript",
            pf.rscript_file(self.conf, "ensemble.R"), preds_basenames,
            models_basenames, y_basename,
            str(k_folds), output_prefix + "-all",
            self.conf.get("paths", "ensemble"), self.ensemble_id
        ])

        if return_code != 0:
            raise ValueError("ensemble.R failed")
Пример #5
0
    def output(self):
        output_basename = pf.output_name(self.conf, self.ensemble_id,
                                         "ensemble_eval_", "eval")

        output_names = []
        for train_type in ["cv", "full"]:
            for test_type in ["all", "test-all-cv"]:
                output_names.append(output_basename + "-" + train_type +
                                    "_trained-" + test_type + ".feather")

        return [luigi.LocalTarget(s) for s in output_names]
Пример #6
0
    def output(self):
        specifiers_list = [
            self.preprocess_conf,
            self.validation_prop,
            self.k_folds,
            self.features_conf,
            self.model_conf,
        ]
        result_path = pf.output_name(self.conf, specifiers_list, "model_",
                                     "models") + "-" + str(
                                         self.cur_fold) + ".RData"

        return luigi.LocalTarget(result_path)
Пример #7
0
    def run(self):
        specifiers_list = [
            self.preprocess_conf, self.validation_prop, self.k_folds,
            self.features_conf, self.model_conf
        ]

        x_path = pf.output_name(self.conf, specifiers_list[:4], "features_",
                                "features") + "-train-" + str(
                                    self.cur_fold) + ".feather"

        y_path = pf.output_name(self.conf, specifiers_list[:3], "responses_",
                                "responses") + "-train-" + str(
                                    self.cur_fold) + ".feather"

        if str(self.cur_fold) == "all":
            x_path = x_path.replace("-train", "")
            y_path = y_path.replace("-train", "")

        result_path = pf.output_name(self.conf, specifiers_list, "model_",
                                     "models") + "-" + str(
                                         self.cur_fold) + ".RData"

        return_code = subprocess.call([
            "Rscript",
            pf.rscript_file(self.conf, "train.R"), x_path, y_path, result_path,
            self.model_conf
        ])

        if return_code != 0:
            raise ValueError("train.R failed")

        mapping = pf.processed_data_dir(self.conf.get("paths", "project_dir"),
                                        os.path.join("models", "models.txt"))

        with open(mapping, "a") as f:
            f.write(",".join(specifiers_list +
                             [os.path.basename(result_path)]) + "\n")
        f.close()
    def run(self):
        specifiers_list = [
            self.preprocess_conf, self.validation_prop, self.k_folds
        ]

        return_code = subprocess.call([
            "Rscript",
            pf.rscript_file(self.conf, "train_test_split.R"),
            self.input().open("r").name,
            pf.output_name(self.conf, specifiers_list, "cv_") + ".feather",
            self.validation_prop, self.k_folds
        ])

        if return_code != 0:
            raise ValueError("melt_counts.R failed")
Пример #9
0
    def output(self):
        pred_basename = pf.output_name(
            self.conf,
            self.ensemble_id,
            "ensemble_preds_",
            "preds"
        )

        outputs = []
        for train_type in ["cv", "full"]:
            for test_type in ["all", "test-all-cv"]:
                outputs.append(
                    luigi.LocalTarget(
                        pred_basename + "-" + train_type + "_trained-" + test_type + ".feather"
                    )
                )
        return outputs
Пример #10
0
    def output(self):
        specifiers_list = [
            self.preprocess_conf,
            self.validation_prop,
            self.k_folds,
            self.features_conf,
            self.model_conf
        ]

        pred_path = pf.output_name(
            self.conf,
            specifiers_list,
            "preds_",
            "preds"
        ) + "-" + str(self.cur_fold) + ".feather"

        return luigi.LocalTarget(pred_path)
Пример #11
0
    def run(self):
        specifiers_list = [
            self.preprocess_conf,
            self.validation_prop,
            self.k_folds,
            self.features_conf
        ]

        output_path = pf.output_name(
            self.conf,
            specifiers_list,
            "features_",
            "features"
        )

        return_code = subprocess.call(
            [
                "Rscript",
                pf.rscript_file(self.conf, "features.R"),
                self.features_conf,
                self.input()[0].open("r").name,
                self.input()[1].open("r").name,
                self.ps_path,
                output_path
            ]
        )

        if return_code != 0:
            raise ValueError("features.R failed")

        mapping = pf.processed_data_dir(
            self.conf.get("paths", "project_dir"),
            os.path.join("features", "features.txt")
        )

        with open(mapping, "a") as f:
            f.write(
                ",".join(specifiers_list + [os.path.basename(output_path)]) + "\n"
            )
        f.close()
Пример #12
0
    def output(self):
        specifiers_list = [
            self.preprocess_conf,
            self.validation_prop,
            self.k_folds,
            self.features_conf
        ]
        result_path = pf.output_name(
            self.conf,
            specifiers_list,
            "features_",
            "features"
        )

        outputs = [luigi.LocalTarget(result_path + "-all.feather")]
        for k in ["all-cv"] + list(range(1, int(self.k_folds) + 1)):
            for v in ["train", "test"]:
                outputs.append(
                    luigi.LocalTarget(
                        result_path + "-" + str(v) + "-" + str(k) + ".feather"
                    )
                )

        return outputs
Пример #13
0
 def output(self):
     output_prefix = pf.output_name(self.conf, self.ensemble_id,
                                    "ensemble_model-", "models")
     suffixes = ["-all-cv_trained.RData", "-all-full_trained.RData"]
     return [luigi.LocalTarget(output_prefix + s) for s in suffixes]