def run(self, methods, num_causal, num_repeats, num_pcs, description, runner, seed=None, plot_fn=None):
        
        
        self.precompute_pca()

        input_files = [self.snp_fn + ext for ext in [".bed", ".fam", ".bim"]] + [self.eigen_fn]
        input_args = [(methods, self.snp_fn, self.eigen_fn, num_causal, num_pcs, seed, sim_id) for sim_id in range(num_repeats)]
        output_list = distributed_map.d_map(semisynth_simulations.compute_core, input_args, runner, input_files=input_files)

        ############################################
        results_fn = "%s_results.runs_%i.causals_%i.pickle.bzip" % (description, num_repeats, num_causal)
        reduced_results_fn = results_fn.replace("runs", "reduced.runs")

        save(results_fn, output_list)

        
        methods = output_list[0][0].keys()
        arg_list = [(method, results_fn) for method in methods]

        #reduce_runner = Hadoop(len(methods), mapmemory=90*1024, reducememory=90*1024, mkl_num_threads=1, queue="shared")
        reduce_runner = Local()
        combine_output = distributed_map.d_map(semisynth_simulations.combine_results, arg_list, reduce_runner, input_files=[results_fn])
        
        save(reduced_results_fn, combine_output)
        title = "%i causal, %i repeats" % (num_causal, num_repeats)
        visualize_reduced_results(methods, combine_output, title=title, plot_fn=plot_fn)

        return combine_output
    def run(self,
            methods,
            num_causal,
            num_repeats,
            num_pcs,
            description,
            runner,
            seed=None,
            plot_fn=None):

        self.precompute_pca()

        input_files = [self.snp_fn + ext
                       for ext in [".bed", ".fam", ".bim"]] + [self.eigen_fn]
        input_args = [(methods, self.snp_fn, self.eigen_fn, num_causal,
                       num_pcs, seed, sim_id) for sim_id in range(num_repeats)]
        output_list = distributed_map.d_map(semisynth_simulations.compute_core,
                                            input_args,
                                            runner,
                                            input_files=input_files)

        ############################################
        results_fn = "%s_results.runs_%i.causals_%i.pickle.bzip" % (
            description, num_repeats, num_causal)
        reduced_results_fn = results_fn.replace("runs", "reduced.runs")

        save(results_fn, output_list)

        methods = output_list[0][0].keys()
        arg_list = [(method, results_fn) for method in methods]

        #reduce_runner = Hadoop(len(methods), mapmemory=90*1024, reducememory=90*1024, mkl_num_threads=1, queue="shared")
        reduce_runner = Local()
        combine_output = distributed_map.d_map(
            semisynth_simulations.combine_results,
            arg_list,
            reduce_runner,
            input_files=[results_fn])

        save(reduced_results_fn, combine_output)
        title = "%i causal, %i repeats" % (num_causal, num_repeats)
        visualize_reduced_results(methods,
                                  combine_output,
                                  title=title,
                                  plot_fn=plot_fn)

        return combine_output
    def test_local_multiproc(self):
        """
        test leave one chromosome out iterator
        """

        # run on 4 core locally
        runner = LocalMultiProc(4)
        result = d_map(dummy, self.args, runner, input_files=[self.fn])
        expect = ['', 'A', 'AA', 'AAA', 'AAAB', 'AAABB', 'AAABBB', 'AAABBBC', 'AAABBBCC', 'AAABBBCCC']

        assert expect == result
Exemplo n.º 4
0
    def test_local_multiproc(self):
        """
        test leave one chromosome out iterator
        """

        # run on 4 core locally
        runner = LocalMultiProc(4)
        result = d_map(dummy, self.args, runner, input_files=[self.fn])
        expect = [
            '', 'A', 'AA', 'AAA', 'AAAB', 'AAABB', 'AAABBB', 'AAABBBC',
            'AAABBBCC', 'AAABBBCCC'
        ]

        assert expect == result
def simulate_ascertained(methods, prevalence, iid_count, num_causal, num_repeats, description, snp_args, phenotype_args, runner=Local(), seed=None, plot_fn=None):
    """
    run a synthetic simulation using ascertained data
    
    :param methods: A list of functions implementing methods to be compared.
    :type methods: list<function>
    
    :param prevalence: Prior probability of a case, e.g. .1
    :type prevalence: a float between 0.0 and 1.0 (exclusive)
       
    :param iid_count: The number of individuals to generate.
    :type iid_count: int
     
    :param num_causal: The number causal SNPs in the simulation.
    :type num_causal: int

    :param num_repeats: The number of repeats in the simulation.
    :type num_repeats: int

    :param description: Short description string of experiment (for output)
    :type description: str
    
    :param num_repeats: The number of repeats in the simulation.
    :type num_repeats: int

    :param snp_args: arguments for an internal call to :func:`GWAS_benchmark.snp_gen`. Do not include
    'iid_count' or 'seed'
    :type snp_args: dictionary

    :param phenotype_args: arguments for an internal call to :func:`.generate_phenotype`. Do not include
    'snp_count' or 'seed'
    :type phenotype_args: dictionary

    :param runner: a Runner object (e.g. Local, Hadoop, HPC)
    :type runner: Runner

    :param seed: a random seed to control random number generation
    :type seed: int

    :param plot_fn: filename under which to save the output figure
    :type plot_fn: str

    """    

    
    input_args = [(methods, num_causal, prevalence, iid_count, snp_args, phenotype_args, seed, sim_id) for sim_id in range(num_repeats)]
    output_list = distributed_map.d_map(semisynth_simulations.compute_core_ascertained, input_args, runner)


    ############################################
    results_fn = "%s_ascertained_results.runs_%i.causals_%i.pickle.bzip" % (description, num_repeats, num_causal)
    reduced_results_fn = results_fn.replace("runs", "reduced.runs")

    save(results_fn, output_list)

    
    methods = output_list[0][0].keys()
    arg_list = [(method, results_fn) for method in methods]

    combine_output = distributed_map.d_map(semisynth_simulations.combine_results, arg_list, Local(), input_files=[results_fn])
    
    save(reduced_results_fn, combine_output)
    title = "%i causal, %i repeats" % (num_causal, num_repeats)
    visualize_reduced_results(methods, combine_output, title=title, plot_fn=plot_fn)

    return combine_output
def simulate_ascertained(methods,
                         prevalence,
                         iid_count,
                         num_causal,
                         num_repeats,
                         description,
                         snp_args,
                         phenotype_args,
                         runner=Local(),
                         seed=None,
                         plot_fn=None):
    """
    run a synthetic simulation using ascertained data
    
    :param methods: A list of functions implementing methods to be compared.
    :type methods: list<function>
    
    :param prevalence: Prior probability of a case, e.g. .1
    :type prevalence: a float between 0.0 and 1.0 (exclusive)
       
    :param iid_count: The number of individuals to generate.
    :type iid_count: int
     
    :param num_causal: The number causal SNPs in the simulation.
    :type num_causal: int

    :param num_repeats: The number of repeats in the simulation.
    :type num_repeats: int

    :param description: Short description string of experiment (for output)
    :type description: str
    
    :param num_repeats: The number of repeats in the simulation.
    :type num_repeats: int

    :param snp_args: arguments for an internal call to :func:`GWAS_benchmark.snp_gen`. Do not include
    'iid_count' or 'seed'
    :type snp_args: dictionary

    :param phenotype_args: arguments for an internal call to :func:`.generate_phenotype`. Do not include
    'snp_count' or 'seed'
    :type phenotype_args: dictionary

    :param runner: a Runner object (e.g. Local, Hadoop, HPC)
    :type runner: Runner

    :param seed: a random seed to control random number generation
    :type seed: int

    :param plot_fn: filename under which to save the output figure
    :type plot_fn: str

    """

    input_args = [(methods, num_causal, prevalence, iid_count, snp_args,
                   phenotype_args, seed, sim_id)
                  for sim_id in range(num_repeats)]
    output_list = distributed_map.d_map(
        semisynth_simulations.compute_core_ascertained, input_args, runner)

    ############################################
    results_fn = "%s_ascertained_results.runs_%i.causals_%i.pickle.bzip" % (
        description, num_repeats, num_causal)
    reduced_results_fn = results_fn.replace("runs", "reduced.runs")

    save(results_fn, output_list)

    methods = output_list[0][0].keys()
    arg_list = [(method, results_fn) for method in methods]

    combine_output = distributed_map.d_map(
        semisynth_simulations.combine_results,
        arg_list,
        Local(),
        input_files=[results_fn])

    save(reduced_results_fn, combine_output)
    title = "%i causal, %i repeats" % (num_causal, num_repeats)
    visualize_reduced_results(methods,
                              combine_output,
                              title=title,
                              plot_fn=plot_fn)

    return combine_output