def run(self): ensemble = pf.values_from_conf(self.conf, "ensemble") ensemble = ensemble[self.ensemble_id] exper = pf.values_from_conf(self.conf, "experiment") ensemble_properties = exper[ensemble["exper_ids"][0]] # Get paths to features on which to generate predictions specifiers_list = [ ensemble_properties["preprocessing"], ensemble_properties["validation_prop"], ensemble_properties["k_folds"], ensemble_properties["features"] ] x_basename = pf.output_name( self.conf, specifiers_list, "features_", "features" ) pred_basename = pf.output_name( self.conf, self.ensemble_id, "ensemble_preds_", "preds" ) model_basename = pf.output_name( self.conf, self.ensemble_id, "ensemble_model-", "models" ) for train_type in ["cv", "full"]: for test_type in ["all", "test-all-cv"]: return_code = subprocess.call( [ "Rscript", pf.rscript_file(self.conf, "predict.R"), x_basename + "-" + test_type + ".feather", model_basename + "-all-" + train_type + "_trained.RData", pred_basename + "-" + train_type + "_trained-" + test_type + ".feather" ] ) if return_code != 0: raise ValueError("predict.R failed")
def output(self): specifiers_list = [ self.preprocess_conf, self.validation_prop, self.k_folds ] result_path = pf.output_name(self.conf, specifiers_list, "cv_") + ".feather" return luigi.LocalTarget(result_path)
def run(self): specifiers_list = [ self.preprocess_conf, self.validation_prop, self.k_folds, self.features_conf, self.model_conf ] x_path = pf.output_name( self.conf, specifiers_list[:4], "features_", "features" ) + "-test-" + str(self.cur_fold) + ".feather" if str(self.cur_fold) == "all": x_path = x_path.replace("test-", "") pred_path = pf.output_name( self.conf, specifiers_list, "preds_", "preds" ) + "-" + str(self.cur_fold) + ".feather" model_path = pf.output_name( self.conf, specifiers_list, "model_", "models" ) + "-" + str(self.cur_fold) + ".RData" return_code = subprocess.call( [ "Rscript", pf.rscript_file(self.conf, "predict.R"), x_path, model_path, pred_path ] ) if return_code != 0: raise ValueError("predict.R failed")
def run(self): ensemble = pf.values_from_conf(self.conf, "ensemble") ensemble = ensemble[self.ensemble_id] exper = pf.values_from_conf(self.conf, "experiment") preds_basenames = "" models_basenames = "" # get paths to experiment results we needs in ensembling for i in exper.keys(): if i not in ensemble["exper_ids"]: continue specifiers_list = [ exper[i]["preprocessing"], exper[i]["validation_prop"], exper[i]["k_folds"], exper[i]["features"], exper[i]["model"] ] preds_basenames += pf.output_name(self.conf, specifiers_list, "preds_", "preds") + ";" models_basenames += pf.output_name(self.conf, specifiers_list, "model_", "models") + ";" # These are assumed constant over experiments, so safe to overwrite y_basename = pf.output_name(self.conf, specifiers_list[:3], "responses_", "responses") k_folds = exper[i]["k_folds"] # Now call the ensemble script output_prefix = pf.output_name(self.conf, self.ensemble_id, "ensemble_model-", "models") return_code = subprocess.call([ "Rscript", pf.rscript_file(self.conf, "ensemble.R"), preds_basenames, models_basenames, y_basename, str(k_folds), output_prefix + "-all", self.conf.get("paths", "ensemble"), self.ensemble_id ]) if return_code != 0: raise ValueError("ensemble.R failed")
def output(self): output_basename = pf.output_name(self.conf, self.ensemble_id, "ensemble_eval_", "eval") output_names = [] for train_type in ["cv", "full"]: for test_type in ["all", "test-all-cv"]: output_names.append(output_basename + "-" + train_type + "_trained-" + test_type + ".feather") return [luigi.LocalTarget(s) for s in output_names]
def output(self): specifiers_list = [ self.preprocess_conf, self.validation_prop, self.k_folds, self.features_conf, self.model_conf, ] result_path = pf.output_name(self.conf, specifiers_list, "model_", "models") + "-" + str( self.cur_fold) + ".RData" return luigi.LocalTarget(result_path)
def run(self): specifiers_list = [ self.preprocess_conf, self.validation_prop, self.k_folds, self.features_conf, self.model_conf ] x_path = pf.output_name(self.conf, specifiers_list[:4], "features_", "features") + "-train-" + str( self.cur_fold) + ".feather" y_path = pf.output_name(self.conf, specifiers_list[:3], "responses_", "responses") + "-train-" + str( self.cur_fold) + ".feather" if str(self.cur_fold) == "all": x_path = x_path.replace("-train", "") y_path = y_path.replace("-train", "") result_path = pf.output_name(self.conf, specifiers_list, "model_", "models") + "-" + str( self.cur_fold) + ".RData" return_code = subprocess.call([ "Rscript", pf.rscript_file(self.conf, "train.R"), x_path, y_path, result_path, self.model_conf ]) if return_code != 0: raise ValueError("train.R failed") mapping = pf.processed_data_dir(self.conf.get("paths", "project_dir"), os.path.join("models", "models.txt")) with open(mapping, "a") as f: f.write(",".join(specifiers_list + [os.path.basename(result_path)]) + "\n") f.close()
def run(self): specifiers_list = [ self.preprocess_conf, self.validation_prop, self.k_folds ] return_code = subprocess.call([ "Rscript", pf.rscript_file(self.conf, "train_test_split.R"), self.input().open("r").name, pf.output_name(self.conf, specifiers_list, "cv_") + ".feather", self.validation_prop, self.k_folds ]) if return_code != 0: raise ValueError("melt_counts.R failed")
def output(self): pred_basename = pf.output_name( self.conf, self.ensemble_id, "ensemble_preds_", "preds" ) outputs = [] for train_type in ["cv", "full"]: for test_type in ["all", "test-all-cv"]: outputs.append( luigi.LocalTarget( pred_basename + "-" + train_type + "_trained-" + test_type + ".feather" ) ) return outputs
def output(self): specifiers_list = [ self.preprocess_conf, self.validation_prop, self.k_folds, self.features_conf, self.model_conf ] pred_path = pf.output_name( self.conf, specifiers_list, "preds_", "preds" ) + "-" + str(self.cur_fold) + ".feather" return luigi.LocalTarget(pred_path)
def run(self): specifiers_list = [ self.preprocess_conf, self.validation_prop, self.k_folds, self.features_conf ] output_path = pf.output_name( self.conf, specifiers_list, "features_", "features" ) return_code = subprocess.call( [ "Rscript", pf.rscript_file(self.conf, "features.R"), self.features_conf, self.input()[0].open("r").name, self.input()[1].open("r").name, self.ps_path, output_path ] ) if return_code != 0: raise ValueError("features.R failed") mapping = pf.processed_data_dir( self.conf.get("paths", "project_dir"), os.path.join("features", "features.txt") ) with open(mapping, "a") as f: f.write( ",".join(specifiers_list + [os.path.basename(output_path)]) + "\n" ) f.close()
def output(self): specifiers_list = [ self.preprocess_conf, self.validation_prop, self.k_folds, self.features_conf ] result_path = pf.output_name( self.conf, specifiers_list, "features_", "features" ) outputs = [luigi.LocalTarget(result_path + "-all.feather")] for k in ["all-cv"] + list(range(1, int(self.k_folds) + 1)): for v in ["train", "test"]: outputs.append( luigi.LocalTarget( result_path + "-" + str(v) + "-" + str(k) + ".feather" ) ) return outputs
def output(self): output_prefix = pf.output_name(self.conf, self.ensemble_id, "ensemble_model-", "models") suffixes = ["-all-cv_trained.RData", "-all-full_trained.RData"] return [luigi.LocalTarget(output_prefix + s) for s in suffixes]