def run(self, methods, num_causal, num_repeats, num_pcs, description, runner, seed=None, plot_fn=None): self.precompute_pca() input_files = [self.snp_fn + ext for ext in [".bed", ".fam", ".bim"]] + [self.eigen_fn] input_args = [(methods, self.snp_fn, self.eigen_fn, num_causal, num_pcs, seed, sim_id) for sim_id in range(num_repeats)] output_list = distributed_map.d_map(semisynth_simulations.compute_core, input_args, runner, input_files=input_files) ############################################ results_fn = "%s_results.runs_%i.causals_%i.pickle.bzip" % (description, num_repeats, num_causal) reduced_results_fn = results_fn.replace("runs", "reduced.runs") save(results_fn, output_list) methods = output_list[0][0].keys() arg_list = [(method, results_fn) for method in methods] #reduce_runner = Hadoop(len(methods), mapmemory=90*1024, reducememory=90*1024, mkl_num_threads=1, queue="shared") reduce_runner = Local() combine_output = distributed_map.d_map(semisynth_simulations.combine_results, arg_list, reduce_runner, input_files=[results_fn]) save(reduced_results_fn, combine_output) title = "%i causal, %i repeats" % (num_causal, num_repeats) visualize_reduced_results(methods, combine_output, title=title, plot_fn=plot_fn) return combine_output
def run(self, methods, num_causal, num_repeats, num_pcs, description, runner, seed=None, plot_fn=None): self.precompute_pca() input_files = [self.snp_fn + ext for ext in [".bed", ".fam", ".bim"]] + [self.eigen_fn] input_args = [(methods, self.snp_fn, self.eigen_fn, num_causal, num_pcs, seed, sim_id) for sim_id in range(num_repeats)] output_list = distributed_map.d_map(semisynth_simulations.compute_core, input_args, runner, input_files=input_files) ############################################ results_fn = "%s_results.runs_%i.causals_%i.pickle.bzip" % ( description, num_repeats, num_causal) reduced_results_fn = results_fn.replace("runs", "reduced.runs") save(results_fn, output_list) methods = output_list[0][0].keys() arg_list = [(method, results_fn) for method in methods] #reduce_runner = Hadoop(len(methods), mapmemory=90*1024, reducememory=90*1024, mkl_num_threads=1, queue="shared") reduce_runner = Local() combine_output = distributed_map.d_map( semisynth_simulations.combine_results, arg_list, reduce_runner, input_files=[results_fn]) save(reduced_results_fn, combine_output) title = "%i causal, %i repeats" % (num_causal, num_repeats) visualize_reduced_results(methods, combine_output, title=title, plot_fn=plot_fn) return combine_output
def test_local_multiproc(self): """ test leave one chromosome out iterator """ # run on 4 core locally runner = LocalMultiProc(4) result = d_map(dummy, self.args, runner, input_files=[self.fn]) expect = ['', 'A', 'AA', 'AAA', 'AAAB', 'AAABB', 'AAABBB', 'AAABBBC', 'AAABBBCC', 'AAABBBCCC'] assert expect == result
def test_local_multiproc(self): """ test leave one chromosome out iterator """ # run on 4 core locally runner = LocalMultiProc(4) result = d_map(dummy, self.args, runner, input_files=[self.fn]) expect = [ '', 'A', 'AA', 'AAA', 'AAAB', 'AAABB', 'AAABBB', 'AAABBBC', 'AAABBBCC', 'AAABBBCCC' ] assert expect == result
def simulate_ascertained(methods, prevalence, iid_count, num_causal, num_repeats, description, snp_args, phenotype_args, runner=Local(), seed=None, plot_fn=None): """ run a synthetic simulation using ascertained data :param methods: A list of functions implementing methods to be compared. :type methods: list<function> :param prevalence: Prior probability of a case, e.g. .1 :type prevalence: a float between 0.0 and 1.0 (exclusive) :param iid_count: The number of individuals to generate. :type iid_count: int :param num_causal: The number causal SNPs in the simulation. :type num_causal: int :param num_repeats: The number of repeats in the simulation. :type num_repeats: int :param description: Short description string of experiment (for output) :type description: str :param num_repeats: The number of repeats in the simulation. :type num_repeats: int :param snp_args: arguments for an internal call to :func:`GWAS_benchmark.snp_gen`. Do not include 'iid_count' or 'seed' :type snp_args: dictionary :param phenotype_args: arguments for an internal call to :func:`.generate_phenotype`. Do not include 'snp_count' or 'seed' :type phenotype_args: dictionary :param runner: a Runner object (e.g. Local, Hadoop, HPC) :type runner: Runner :param seed: a random seed to control random number generation :type seed: int :param plot_fn: filename under which to save the output figure :type plot_fn: str """ input_args = [(methods, num_causal, prevalence, iid_count, snp_args, phenotype_args, seed, sim_id) for sim_id in range(num_repeats)] output_list = distributed_map.d_map(semisynth_simulations.compute_core_ascertained, input_args, runner) ############################################ results_fn = "%s_ascertained_results.runs_%i.causals_%i.pickle.bzip" % (description, num_repeats, num_causal) reduced_results_fn = results_fn.replace("runs", "reduced.runs") save(results_fn, output_list) methods = output_list[0][0].keys() arg_list = [(method, results_fn) for method in methods] combine_output = distributed_map.d_map(semisynth_simulations.combine_results, arg_list, Local(), input_files=[results_fn]) save(reduced_results_fn, combine_output) title = "%i causal, %i repeats" % (num_causal, num_repeats) visualize_reduced_results(methods, combine_output, title=title, plot_fn=plot_fn) return combine_output
def simulate_ascertained(methods, prevalence, iid_count, num_causal, num_repeats, description, snp_args, phenotype_args, runner=Local(), seed=None, plot_fn=None): """ run a synthetic simulation using ascertained data :param methods: A list of functions implementing methods to be compared. :type methods: list<function> :param prevalence: Prior probability of a case, e.g. .1 :type prevalence: a float between 0.0 and 1.0 (exclusive) :param iid_count: The number of individuals to generate. :type iid_count: int :param num_causal: The number causal SNPs in the simulation. :type num_causal: int :param num_repeats: The number of repeats in the simulation. :type num_repeats: int :param description: Short description string of experiment (for output) :type description: str :param num_repeats: The number of repeats in the simulation. :type num_repeats: int :param snp_args: arguments for an internal call to :func:`GWAS_benchmark.snp_gen`. Do not include 'iid_count' or 'seed' :type snp_args: dictionary :param phenotype_args: arguments for an internal call to :func:`.generate_phenotype`. Do not include 'snp_count' or 'seed' :type phenotype_args: dictionary :param runner: a Runner object (e.g. Local, Hadoop, HPC) :type runner: Runner :param seed: a random seed to control random number generation :type seed: int :param plot_fn: filename under which to save the output figure :type plot_fn: str """ input_args = [(methods, num_causal, prevalence, iid_count, snp_args, phenotype_args, seed, sim_id) for sim_id in range(num_repeats)] output_list = distributed_map.d_map( semisynth_simulations.compute_core_ascertained, input_args, runner) ############################################ results_fn = "%s_ascertained_results.runs_%i.causals_%i.pickle.bzip" % ( description, num_repeats, num_causal) reduced_results_fn = results_fn.replace("runs", "reduced.runs") save(results_fn, output_list) methods = output_list[0][0].keys() arg_list = [(method, results_fn) for method in methods] combine_output = distributed_map.d_map( semisynth_simulations.combine_results, arg_list, Local(), input_files=[results_fn]) save(reduced_results_fn, combine_output) title = "%i causal, %i repeats" % (num_causal, num_repeats) visualize_reduced_results(methods, combine_output, title=title, plot_fn=plot_fn) return combine_output