MPControl.set_multiprocess_engine("dask-cluster")
    MPControl.client.use_default_configuration("rusty_ccb", n_jobs=n_jobs)
    MPControl.client.add_worker_conda(
        "source ~/.local/anaconda3/bin/activate inferelator")
    MPControl.client.add_slurm_command_line("--constraint=broadwell")
    MPControl.connect()


if __name__ == '__main__':
    set_up_dask()

    utils.Debug.vprint("Testing preprocessing", level=0)

    # Figure 5D: BBSR By Task Learning

    worker = workflow.inferelator_workflow(regression="amusr",
                                           workflow="multitask")
    set_up_workflow(worker)
    worker.append_to_path('output_dir', 'figure_4_count')
    cv_wrap = set_up_cv_seeds(worker)
    cv_wrap.run()

    del cv_wrap
    del worker

    worker = workflow.inferelator_workflow(regression="amusr",
                                           workflow="multitask")
    set_up_workflow(worker)
    worker.add_preprocess_step("log2")
    worker.append_to_path('output_dir', 'figure_4_log2')
    cv_wrap = set_up_cv_seeds(worker)
    cv_wrap.run()
Exemplo n.º 2
0
    MPControl.set_multiprocess_engine("dask-cluster")
    MPControl.client.use_default_configuration("rusty_ccb", n_jobs=n_jobs)
    MPControl.client.add_worker_conda(
        "source ~/.local/anaconda3/bin/activate inferelator")
    MPControl.client.add_slurm_command_line("--constraint=broadwell")
    MPControl.connect()


if __name__ == '__main__':
    set_up_dask()

    utils.Debug.vprint("Testing preprocessing", level=0)

    # Figure 5D: BBSR By Task Learning

    worker = workflow.inferelator_workflow(regression="bbsr-by-task",
                                           workflow="multitask")
    set_up_workflow(worker)
    worker.append_to_path('output_dir', 'figure_4_count')
    cv_wrap = set_up_cv_seeds(worker)
    #cv_wrap.run()

    del cv_wrap
    del worker

    worker = workflow.inferelator_workflow(regression="bbsr-by-task",
                                           workflow="multitask")
    set_up_workflow(worker)
    worker.add_preprocess_step("log2")
    worker.append_to_path('output_dir', 'figure_4_log2')
    cv_wrap = set_up_cv_seeds(worker)
    #cv_wrap.run()
                       priors_file=PRIORS_FILE_NAME,
                       gold_standard_file=GOLD_STANDARD_FILE_NAME)
    wkf.set_expression_file(tsv=EXPRESSION_FILE_NAME)
    wkf.set_file_properties(expression_matrix_columns_are_genes=False)
    wkf.set_run_parameters(num_bootstraps=5)
    wkf.set_crossvalidation_parameters(split_gold_standard_for_crossvalidation=True, cv_split_ratio=0.2)
    return wkf


# Inference with BBSR (crossvalidation)
# Using the crossvalidation wrapper
# Run the regression 10 times and hold 20% of the gold standard out of the priors for testing each time
# Each run is seeded differently (and therefore has different holdouts)

# Create a worker
worker = workflow.inferelator_workflow(regression="bbsr", workflow="tfa")
worker = set_up_workflow(worker)
worker.append_to_path("output_dir", "bbsr")

# Create a crossvalidation wrapper
cv_wrap = CrossValidationManager(worker)

# Assign variables for grid search
cv_wrap.add_gridsearch_parameter('random_seed', CV_SEEDS)

# Run
cv_wrap.run()

# Inference with Elastic Net (crossvalidation)
# Using the crossvalidation wrapper
# Run the regression 10 times and hold 20% of the gold standard out of the priors for testing each time
    MPControl.set_multiprocess_engine("dask-cluster")
    MPControl.client.use_default_configuration("rusty_ccb", n_jobs=n_jobs)
    MPControl.client.add_worker_conda(
        "source ~/.local/anaconda3/bin/activate inferelator")
    MPControl.client.add_slurm_command_line("--constraint=broadwell")
    MPControl.connect()


if __name__ == '__main__':
    set_up_dask()

    utils.Debug.vprint("Testing preprocessing", level=0)

    # Figure 5D: BBSR By Task Learning

    worker = workflow.inferelator_workflow(regression="bbsr",
                                           workflow="single-cell")
    set_up_workflow(worker)
    worker.append_to_path('output_dir', 'figure_4_count')
    cv_wrap = set_up_cv_seeds(worker)
    cv_wrap.run()

    del cv_wrap
    del worker

    worker = workflow.inferelator_workflow(regression="bbsr",
                                           workflow="single-cell")
    set_up_workflow(worker)
    worker.add_preprocess_step("log2")
    worker.append_to_path('output_dir', 'figure_4_log2')
    cv_wrap = set_up_cv_seeds(worker)
    cv_wrap.run()
Exemplo n.º 5
0
YEASTRACT_TF_NAMES = "tf_names_yeastract.txt"

INPUT_DIR = '/mnt/ceph/users/cjackson/inferelator/data/yeast'
OUTPUT_PATH = '/mnt/ceph/users/cjackson/jackson_2019_inferelator_v050/'

OUTPUT_FOLDER = "tau43_c"

if __name__ == '__main__':
    MPControl.set_multiprocess_engine("dask-cluster")
    MPControl.client.use_default_configuration("rusty_preempt")
    MPControl.client.set_job_size_params(n_jobs=1)
    MPControl.client.add_worker_conda(
        "source ~/.local/anaconda3/bin/activate inferelator")
    MPControl.connect()

wkf = inferelator_workflow("stars", VelocityWorkflow)
wkf.set_file_paths(input_dir=INPUT_DIR,
                   output_dir=os.path.join(OUTPUT_PATH, OUTPUT_FOLDER),
                   gold_standard_file='gold_standard.tsv',
                   priors_file=YEASTRACT_PRIOR,
                   tf_names_file=YEASTRACT_TF_NAMES)
wkf.set_expression_file(h5ad=DATA_FILE, h5_layer="counts")
wkf.set_velocity_parameters(velocity_file_name=DATA_FILE,
                            velocity_file_type="h5ad",
                            velocity_file_layer="pv")
wkf.set_crossvalidation_parameters(
    split_gold_standard_for_crossvalidation=True, cv_split_ratio=0.5)
wkf.set_run_parameters(num_bootstraps=5)
wkf.set_count_minimum(0.05)
wkf.add_preprocess_step(single_cell.log2_data)
wkf.tau = 43.28
    def setUp(self):

        self.worker = inferelator_workflow()
from inferelator import workflow
from inferelator.distributed.inferelator_mp import MPControl
from inferelator import utils

utils.Debug.set_verbose_level(1)

MPControl.set_multiprocess_engine("multiprocessing")
MPControl.client.processes = 3

wflow = workflow.inferelator_workflow(regression="bbsr", workflow="tfa")
# Common configuration parameters
wflow.input_dir = 'data/bsubtilis'
wflow.num_bootstraps = 2
wflow.delTmax = 110
wflow.delTmin = 0
wflow.tau = 45

if __name__ == "__main__":
    wflow.run()
Exemplo n.º 8
0
    MPControl.set_multiprocess_engine("dask-cluster")
    MPControl.client.minimum_cores = n_cores
    MPControl.client.maximum_cores = n_cores
    MPControl.client.walltime = '48:00:00'
    MPControl.client.add_worker_env_line('module load slurm')
    MPControl.client.add_worker_env_line('module load gcc/8.3.0')
    MPControl.client.add_worker_env_line('source ' + CONDA_ACTIVATE_PATH)
    MPControl.client.cluster_controller_options.append("-p ccb")
    MPControl.connect()


if __name__ == '__main__':
    start_mpcontrol_dask(100)

    for seed in range(42, 52):
        worker = workflow.inferelator_workflow(regression="amusr",
                                               workflow="amusr")

        worker.set_file_paths(input_dir=INPUT_DIR,
                              output_dir=OUTPUT_DIR,
                              gold_standard_file="gold_standard.tsv",
                              gene_metadata_file="orfs.tsv",
                              priors_file=YEASTRACT_PRIOR,
                              tf_names_file=YEASTRACT_TF_NAMES)
        worker.set_file_properties(gene_list_index="SystematicName")
        worker.set_task_filters(target_expression_filter="union",
                                regulator_expression_filter="intersection")
        worker.set_run_parameters(num_bootstraps=5, random_seed=seed)
        worker.set_crossvalidation_parameters(
            split_gold_standard_for_crossvalidation=True, cv_split_ratio=0.2)

        worker.append_to_path('output_dir',
Exemplo n.º 9
0
 def test_tfa(self):
     from inferelator.tfa_workflow import TFAWorkFlow
     worker = workflow.inferelator_workflow(regression=RegressionWorkflow,
                                            workflow="tfa")
     self.assertTrue(isinstance(worker, TFAWorkFlow))
Exemplo n.º 10
0
 def test_singlecell(self):
     from inferelator.single_cell_workflow import SingleCellWorkflow
     worker = workflow.inferelator_workflow(regression=RegressionWorkflow,
                                            workflow="single-cell")
     self.assertTrue(isinstance(worker, SingleCellWorkflow))
Exemplo n.º 11
0
 def test_elasticnet(self):
     from inferelator.regression.elasticnet_python import ElasticNetWorkflow
     worker = workflow.inferelator_workflow(regression="elasticnet",
                                            workflow=workflow.WorkflowBase)
     self.assertTrue(isinstance(worker, ElasticNetWorkflow))
Exemplo n.º 12
0
 def test_bbsr(self):
     from inferelator.regression.bbsr_python import BBSRRegressionWorkflow
     worker = workflow.inferelator_workflow(regression="bbsr",
                                            workflow=workflow.WorkflowBase)
     self.assertTrue(isinstance(worker, BBSRRegressionWorkflow))
Exemplo n.º 13
0
 def test_base(self):
     worker = workflow.inferelator_workflow(regression=None,
                                            workflow=workflow.WorkflowBase)
     with self.assertRaises(NotImplementedError):
         worker.run()
Exemplo n.º 14
0
TF_NAMES = "tf_names_gold_standard.txt"
YEASTRACT_TF_NAMES = "tf_names_yeastract.txt"

INPUT_DIR = '/mnt/ceph/users/cjackson/inferelator/data/yeast'
OUTPUT_PATH = '/mnt/ceph/users/cjackson/jackson_2019_inferelator_v050/'

if __name__ == '__main__':
    MPControl.set_multiprocess_engine("dask-cluster")
    MPControl.client.use_default_configuration("rusty_preempt")
    MPControl.client.set_job_size_params(n_jobs=1)
    MPControl.client.add_worker_conda(
        "source ~/.local/anaconda3/bin/activate inferelator")
    MPControl.connect()

wkf = inferelator_workflow("stars", "single-cell")
wkf.set_file_paths(input_dir=INPUT_DIR,
                   output_dir=os.path.join(OUTPUT_PATH, "no_velocity"),
                   gold_standard_file='gold_standard.tsv',
                   priors_file=YEASTRACT_PRIOR,
                   tf_names_file=YEASTRACT_TF_NAMES)
wkf.set_expression_file(h5ad=DATA_FILE, h5_layer="smooth_count")
wkf.set_crossvalidation_parameters(
    split_gold_standard_for_crossvalidation=True, cv_split_ratio=0.5)
wkf.set_run_parameters(num_bootstraps=5)
wkf.set_count_minimum(0.05)
wkf.add_preprocess_step(single_cell.log2_data)

cv_wrap = crossvalidation_workflow.CrossValidationManager(wkf)
cv_wrap.add_gridsearch_parameter('random_seed', list(range(42, 52)))
Exemplo n.º 15
0
                            metadata_handler="nonbranching")
    wkf.set_run_parameters(num_bootstraps=5)
    wkf.set_crossvalidation_parameters(
        split_gold_standard_for_crossvalidation=True, cv_split_ratio=0.2)

    wkf.append_to_path('output_dir', "yeast_calico")


if __name__ == '__main__':
    MPControl.set_multiprocess_engine("dask-cluster")
    MPControl.client.use_default_configuration("rusty_ccb", n_jobs=3)
    MPControl.client.add_worker_conda(
        "source ~/.local/anaconda3/bin/activate inferelator")
    MPControl.connect()

    worker = workflow.inferelator_workflow(regression="stars",
                                           workflow="single-cell")
    single_cell_setup(worker)

    # Create a crossvalidation wrapper
    cv_wrap = CrossValidationManager(worker)

    # Assign variables for grid search
    cv_wrap.add_gridsearch_parameter('random_seed', list(range(42, 52)))

    # Run
    # cv_wrap.run()
    del worker
    del cv_wrap

    worker = workflow.inferelator_workflow(regression="stars", workflow="tfa")
    calico_setup(worker)
    MPControl.set_multiprocess_engine("dask-cluster")
    MPControl.client.minimum_cores = n_cores
    MPControl.client.maximum_cores = n_cores
    MPControl.client.walltime = '48:00:00'
    MPControl.client.add_worker_env_line('module load slurm')
    MPControl.client.add_worker_env_line('module load gcc/8.3.0')
    MPControl.client.add_worker_env_line('source ' + CONDA_ACTIVATE_PATH)
    MPControl.client.cluster_controller_options.append("-p ccb")
    MPControl.connect()


if __name__ == '__main__':
    start_mpcontrol_dask(100)

    for seed in range(42, 52):
        worker = workflow.inferelator_workflow(
            regression=BBSRByTaskRegressionWorkflow, workflow="amusr")
        worker.set_file_paths(input_dir=INPUT_DIR,
                              output_dir=OUTPUT_DIR,
                              gold_standard_file="gold_standard.tsv",
                              gene_metadata_file="orfs.tsv",
                              priors_file=YEASTRACT_PRIOR,
                              tf_names_file=YEASTRACT_TF_NAMES)
        worker.set_file_properties(gene_list_index="SystematicName")
        worker.set_task_filters(target_expression_filter="union",
                                regulator_expression_filter="intersection")
        worker.set_run_parameters(num_bootstraps=5, random_seed=seed)
        worker.set_crossvalidation_parameters(
            split_gold_standard_for_crossvalidation=True, cv_split_ratio=0.2)

        worker.append_to_path('output_dir',
                              "hybrid_bbsr_mtl_seed_" + str(seed))
Exemplo n.º 17
0
 def setUp(self):
     self.workflow = workflow.inferelator_workflow(workflow="amusr",
                                                   regression="amusr")
     self.workflow.create_output_dir = lambda *x: None
     self.workflow.gold_standard = TaskDataStub.priors_data.copy()