Exemplo n.º 1
0
    def run(self):
        checkpoint_manager = options().checkpoint
        assert isinstance(checkpoint_manager, CheckPointManager)

        t = time.time()

        if checkpoint_manager.is_recovering:
            checkpoint_manager.restore_checkpoint()
            self.root_problem = \
                checkpoint_manager.checkpoint_state.root_problem
            self.check_outputprefix()
        else:
            '''check input arguments'''
            self.check_options()

            '''build the problem structure'''
            self.root_problem = self.build_subproblems()

            '''build jobs'''
            self.build_jobs()

        '''connect jobs into a DAG'''
        self.connect_jobs()

        '''Queue up first level jobs (i.e. those with no dependency).
        Once these run, they should automatically enqueue the rest of the
        DAG through joins and callbacks '''
        self.enqueue_firstlevel_job()

        '''start the checkpointing (has any effects only in
           checkpointing mode)'''
        checkpoint_manager.start_checkpointing(self.root_problem)

        '''Wait for all jobs to finish'''
        if (not JobPool().wait_for_all_jobs()):
            _LOG.exception(
                "There have been errors in executed jobs. Terminating.")
            sys.exit(1)

        ''' terminate The job pool and release memory'''
        JobPool().terminate()

        ''' Pause Checkpointing'''
        checkpoint_manager.pause_checkpointing()
        # checkpoint_manager.force_checkpoint()

        '''Merge results into final outputs'''
        self.merge_results()

        '''Output final results'''
        self.output_results()

        ''' Pause Checkpointing'''
        checkpoint_manager.stop_checkpointing()

        _LOG.info("Current execution Finished in %d seconds"
                  % (time.time() - t))
        _LOG.info(
            "All checkpointed executions Finished in %d cumulative time" %
            (checkpoint_manager.get_total_time()))
Exemplo n.º 2
0
def run():
    global root_problem
    JobPool().terminate()
    JobPool().__init__(2)
    JobPool(2)
    '''build the problem structure'''
    root_problem = build_subproblems()
    '''build the dat of jobs'''
    build_job_dag(root_problem)
    '''All buildmodel jobs are ready to be started (i.e. no dependency).
    Queue them up. Once they run, they will automatically enqueue the rest of
    the DAG through joins and callbacks '''
    def enqueue_buildmodel_job(problem):
        if problem.parent is not None:
            JobPool().enqueue_job(problem.jobs["buildmodel"])
        for child in problem.children:
            enqueue_buildmodel_job(child)

    enqueue_buildmodel_job(root_problem)
    '''Wait for all jobs to finish'''
    JobPool().wait_for_all_jobs()
    ''' print out the results of summarize jobs. We could merge the
    results from all summarize jobs here if we wanted '''
    for problem in root_problem.iter_nodes_at_level(SUMMERIZE_LEVEL):
        print(trimstr(problem.get_job_result_by_name("buildmodel")),
              trimstr(problem.get_job_result_by_name("summarize")))
Exemplo n.º 3
0
    def testError(self):
        find_job = TestExternalJob()
        find_job.pattern = "somerandomdirectorywewillneverhavehere_ordowe"
        ''' Let's ignore this error in subsequent test cases. '''
        find_job.ignore_error = True
        try:
            JobPool().enqueue_job(find_job)
            assert JobPool().wait_for_all_jobs() is False
        except JobError as e:
            assert str(e).find("No such file or directory") != -1, \
                "The error we expected is no such file or directory"

        assert JobPool().get_asynch_result_object(find_job).successful() is \
            False, "We expected the job to fail"
Exemplo n.º 4
0
    def perform(self):
        '''
        Distributes fragments to alignments subsets with best score, 
        and runs align jobs on those. Also, creates new chunks of fragments
        for better parallelism.     
        '''
  
        ''' Figure out which fragment should go to which subproblem'''
        self.figureout_fragment_subset()
                
        ''' For each alignment subproblem, 
        1) make sure its fragments are evenly distributed to fragment chunks. 
        2) Setup alignment jobs for its children and enqueue them'''
        alg_problems = [alg for p in self.root_problem.children for alg in p.children ]
        for alg_problem in alg_problems:
            assert isinstance(alg_problem, SeppProblem)
            chunks = len(alg_problem.get_children())
            fragment_chunks = alg_problem.fragments.divide_to_equal_chunks(chunks)

            ''' Now setup alignment jobs and enqueue them'''
            for (i,fragment_chunk_problem) in enumerate(alg_problem.children):        
                fragment_chunk_problem.fragments = fragment_chunks[i] 
                aj = fragment_chunk_problem.jobs['hmmalign']
                assert isinstance(aj,HMMAlignJob)            
                ''' First Complete setting up alignments''' 
                aj.hmmmodel = alg_problem.get_job_result_by_name('hmmbuild')
                aj.base_alignment = alg_problem.jobs["hmmbuild"].infile    

                if fragment_chunk_problem.fragments is None or fragment_chunk_problem.fragments.is_empty():
                    aj.fake_run = True
                else:
                    fragment_chunk_problem.fragments.write_to_path(aj.fragments)
                ''' Now the align job can be put on the queue '''
                JobPool().enqueue_job(aj)                
Exemplo n.º 5
0
    def perform(self):
        pp = self.placement_problem
        fullExtendedAlignments = self.merge_subalignments()

        for i in range(0, self.root_problem.fragment_chunks):
            fullExtendedAlignment = fullExtendedAlignments[i]
            # Split the backbone alignment and query sequences
            # into separate files
            queryExtendedAlignment = \
                fullExtendedAlignment.get_fragments_readonly_alignment()
            baseAlignment = fullExtendedAlignment.get_base_readonly_alignment()
            pj = pp.jobs[get_placement_job_name(i)]
            assert isinstance(pj, PplacerJob)
            if (queryExtendedAlignment.is_empty()):
                pj.fake_run = True

            # Write out the extended alignments, split into query and full-
            # length for pplacer
            queryExtendedAlignment.write_to_path(pj.extended_alignment_file)
            baseAlignment.write_to_path(pj.backbone_alignment_file)

            # But keep the extended alignment on everything
            pj.set_attribute("full_extended_alignment_object",
                             fullExtendedAlignment)

            JobPool().enqueue_job(pj)
Exemplo n.º 6
0
    def testCpuCount(self):
        # Just to make different test cases independent of each other.
        config._options_singelton = None
        # Disable main config path for this test
        config.main_config_path = self.fp_config
        JobPool().terminate()
        JobPool().__init__(7)
        sys.argv = [sys.argv[0], "-x", "7"]

        assert options().cpu == 7, "Commandline option -x not read properly"

        # clean up after test:
        # 1) the JobPool CPU counts needs to be reset to the default
        # 2) the command line arguments must be restored
        JobPool().terminate()
        JobPool().__init__(cpu_count())
        sys.argv = [sys.argv[0], "-x", str(cpu_count())]
        config._options_singelton = None
        options()
Exemplo n.º 7
0
 def perform(self):
     '''
     Aggregate fragments from tips to the SUMMERIZE_LEVEL level problem, 
     and enqueue a summarize job
     '''
     print("Process [%s]: Join_ApplyModel_Summarize joining %s" %
           (os.getpid(), self.summarylevel_problem),
           file=sys.stderr)
     resultsPerTipSubproblem = []
     for tip in self.summarylevel_problem.iter_leaves():
         resultsPerTipSubproblem.append(
             tip.get_job_result_by_name("applymodel"))
     self.summarylevel_problem.jobs[
         "summarize"].resultsPerTipSubproblem = resultsPerTipSubproblem
     JobPool().enqueue_job(self.summarylevel_problem.jobs["summarize"])
Exemplo n.º 8
0
    def testSuccess(self):
        find_job = TestExternalJob()
        find_job.pattern = "."
        find_job.options = "-name *.py"
        JobPool().enqueue_job(find_job)

        JobPool().wait_for_all_jobs()

        res0 = find_job.result.split('\n')[0]

        assert res0 != ""

        find_job = TestExternalJob()
        find_job.pattern = ".."
        find_job.options = "-name %s" % res0.replace("./test", "*")
        JobPool().enqueue_job(find_job)

        JobPool().wait_for_all_jobs()

        res1 = find_job.result.split('\n')[0]

        assert res1 != ""

        assert res1.replace("../unittest/", "./") == res0
Exemplo n.º 9
0
    def testSuccess(self):
        find_job = TestExternalJob()
        find_job.pattern = "."
        find_job.options = "-name test*.py"
        JobPool().enqueue_job(find_job)

        JobPool().wait_for_all_jobs()

        res0 = find_job.result.split('\n')[0]

        assert res0 != ""

        find_job = TestExternalJob()
        find_job.pattern = ".."
        find_job.options = "-name %s" % res0.split('/')[-1]
        JobPool().enqueue_job(find_job)

        JobPool().wait_for_all_jobs()

        res1 = find_job.result.split('\n')[0]

        assert res1 != ""

        assert res1.endswith(res0[2:])
Exemplo n.º 10
0
    def perform(self):
        pp = self.placement_problem
        fullExtendedAlignments = self.merge_subalignments()

        for i in range(0, self.root_problem.fragment_chunks):
            fullExtendedAlignment = fullExtendedAlignments[i]
            # Split the backbone alignment and query sequences into
            # separate files
            queryExtendedAlignment = \
                fullExtendedAlignment.get_fragments_readonly_alignment()
            base_alignment = fullExtendedAlignment.\
                get_base_readonly_alignment()
            pj = pp.jobs[get_placement_job_name(i)]

            if queryExtendedAlignment.is_empty():
                pj.fake_run = True

            if self.placer == "pplacer":
                assert isinstance(pj, PplacerJob)

                # Write out the extended alignments, split into query and
                # full-length for pplacer
                queryExtendedAlignment.write_to_path(
                    pj.extended_alignment_file)
                base_alignment.write_to_path(pj.backbone_alignment_file)

            elif self.placer == "epa":
                # assert isinstance(pj, EPAJob)
                raise ValueError("EPA Currently not supported")

                # Write out the extended alignments in phylip for EPA
                # fullExtendedAlignment.write_to_path(
                #    pj.extended_alignment_file, schema="PHYLIP")

            # keep the extended alignment on everything
            # pj.set_attribute("full_extended_alignment_object",
            # fullExtendedAlignment)

            # TODO: Removed this, as it can cause unexpected lockups
            output = open(pj.full_extended_alignment_file, 'wb')
            pickle.dump(fullExtendedAlignment, output)
            output.close()

            # Enqueue the placement job
            JobPool().enqueue_job(pj)
Exemplo n.º 11
0
    def perform(self):
        print("Process [%s]: Join_BuildModel_SearchFragment joining %s" %
              (os.getpid(), self.grandparent_problem),
              file=sys.stderr)
        '''
        1 - start from grandparent fragments.
        2 - Based on results from joined search operations, figure out for each
            grandparent fragments whether it is closer to parent1 or parent2
            (grandparent's children), and divide the fragments accordingly
        3 - Set fragments attribute of parent1 and parent2 based on preceding
            calculation
        4 - For each of grandchildrens of the grandparent problem,
            4-1 Set the fragments attribute of their searchfragment job to the
                fragments of its parents (set in step 3)
            4-2 Set the model attribute of their searchfragment job to the
                model computed in its buildmodel job
            4-3 Enqueue its searchfragment job

        The above procedure is equivalent of figuring out the child HMM model
        that a fragment is closer to, and classifying it accordingly.
        '''
        fragments = self.grandparent_problem.fragments
        frags_range = list(range(0, len(fragments)))
        c1_res = self.grandparent_problem.children[0].get_job_result_by_name(
            "searchfragment")
        c2_res = self.grandparent_problem.children[1].get_job_result_by_name(
            "searchfragment")
        model_search = [c1_res[i] - c2_res[i] for i in frags_range]
        self.grandparent_problem.children[0].fragments = [
            fragments[i] for i in frags_range if model_search[i] < 0
        ]
        self.grandparent_problem.children[1].fragments = [
            fragments[i] for i in frags_range if model_search[i] >= 0
        ]
        for l1 in self.grandparent_problem.children:
            fragments = l1.fragments
            for l2 in l1.children:
                l2j = l2.jobs["searchfragment"]
                l2j.model = l2.get_job_result_by_name("buildmodel")
                l2j.fragments = fragments
                JobPool().enqueue_job(l2j)
Exemplo n.º 12
0
    def perform(self):
        pp = self.placement_problem
        fullExtendedAlignment = self.merge_subalignments()
        pj = pp.jobs["placer"]

        #Split the backbone alignment and query sequences into separate files
        queryExtendedAlignment = fullExtendedAlignment.get_fragments_readonly_alignment(
        )
        baseAlignment = fullExtendedAlignment.get_base_readonly_alignment()

        # Check for empty fragment files
        if (queryExtendedAlignment.is_empty()):
            pj.fake_run = True

        elif self.placer == "pplacer":
            assert isinstance(pj, PplacerJob)

            #Write out the extended alignments, split into query and full-length for pplacer
            queryExtendedAlignment.write_to_path(pj.extended_alignment_file)
            baseAlignment.write_to_path(pj.backbone_alignment_file)

        elif self.placer == "epa":
            assert isinstance(pj, EPAJob)

            #Write out the extended alignments in phylip for EPA
            fullExtendedAlignment.write_to_path(pj.extended_alignment_file,
                                                schema="PHYLIP")

        #keep the extended alignment on everything
        #pj.set_attribute("full_extended_alignment_object", fullExtendedAlignment)

        #TODO:  Removed this, as it can cause unexpected lockups
        output = open(pj.full_extended_alignment_file, 'wb')
        pickle.dump(fullExtendedAlignment, output)
        output.close()

        # Enqueue the placement job
        JobPool().enqueue_job(pj)
Exemplo n.º 13
0
    def perform(self):
        '''
        First print out some summary of everything up to here.
        Then update applymodel jobs with correct fragment and model, and 
        then enqueue them.  
        '''
        print("Process [%s]: Join_tip_searchfragment joining %s" %
              (os.getpid(), self.root_problem),
              file=sys.stderr)

        def print_fragments(problem):
            print("level " + str(problem.level),
                  str(problem.get_job_result_by_name("buildmodel")),
                  problem.fragments)
            for c in problem.children:
                print_fragments(c)

        print_fragments(self.root_problem)

        for p in root_problem.iter_leaves():
            j = p.jobs["applymodel"]
            j.fragments = p.jobs["searchfragment"].fragments
            j.model = p.jobs["searchfragment"].model
            JobPool().enqueue_job(j)
Exemplo n.º 14
0
    def testNoPipe(self):
        find_job = TestExternalJob(pipe=1)
        find_job.pattern = "."
        find_job.options = "-name *.py"
        JobPool().enqueue_job(find_job)
        JobPool().wait_for_all_jobs()
        res0 = find_job.result.split('\n')[0]
        assert res0 != ""

        find_job = TestExternalJob(pipe=1)
        find_job.pattern = "somerandomdirectorywewillneverhavehere_ordowe"
        ''' Let's ignore this error in subsequent test cases. '''
        find_job.ignore_error = True
        try:
            JobPool().enqueue_job(find_job)
            assert JobPool().wait_for_all_jobs() is False
            JobPool().get_asynch_result_object(find_job).get()
        except JobError as e:
            assert str(e).find("No such file or directory") != -1, \
                "The error we expected is no such file or directory"

        assert JobPool().get_asynch_result_object(find_job).successful() is \
            False, "We expected the job to fail"
Exemplo n.º 15
0
 def enqueue_buildmodel_job(problem):
     if problem.parent is not None:
         JobPool().enqueue_job(problem.jobs["buildmodel"])
     for child in problem.children:
         enqueue_buildmodel_job(child)
Exemplo n.º 16
0
    Note that this could have been also achived using a join that has only one job.'''
    for c in root_problem.children:

        def enq_job_searchfragment(result, next_job):
            next_job.model = result
            next_job.fragments = root_problem.fragments
            JobPool().enqueue_job(next_job)

        c.jobs["buildmodel"].add_call_Back(lambda result, next_job=c.jobs[
            "searchfragment"]: enq_job_searchfragment(result, next_job))


s = 0
lock = Lock()
if __name__ == '__main__':
    pool = JobPool(2)
    '''build the problem structure'''
    root_problem = build_subproblems()
    '''build the dat of jobs'''
    build_job_dag(root_problem)
    '''All buildmodel jobs are ready to be started (i.e. no dependency).
    Queue them up. Once they run, they will automatically enqueue the rest of the
    DAG through joins and callbacks '''
    def enqueue_buildmodel_job(problem):
        if problem.parent is not None:
            JobPool().enqueue_job(problem.jobs["buildmodel"])
        for child in problem.children:
            enqueue_buildmodel_job(child)

    enqueue_buildmodel_job(root_problem)
    '''Wait for all jobs to finish'''
Exemplo n.º 17
0
def run():
    global pool
    pool1 = JobPool(2)
    pool2 = JobPool()
    if pool1 != pool2:
        raise Exception("hmmm, I thought JobPool is 'Singleton'")
    try:
        JobPool(4)
    except Exception as e:
        print(("As expected, making a new JobPool with a"
               " different cpu count failed: %s") % e)

    pool = JobPool()
    jobs = []
    for j in range(1, 20):
        job = TestJob(str(j))
        jobs.append(job)
        pool.enqueue_job(job)

    sample_job = pool.get_asynch_result_object(jobs[3])

    # pool.terminate()

    pool.wait_for_all_jobs(ignore_error=True)

    # Test one of the jobs, to see if it is successful
    if sample_job.ready() and sample_job.successful():
        assert(jobs[3].result_set is True)
    else:
        assert(jobs[3].result_set is False)

    errors = pool.get_all_job_errors()
    # print("Following job errors were raised:", errors)

    try:
        pool.wait_for_all_jobs(ignore_error=False)
    except Exception as e:
        print("Seems we have some jobs that failed (expected): ", e)

    errs = [pool.get_job_error(job) for job in pool.get_failed_jobs()]

    # print(errs)

    assert len(errs) == len(errors), \
        "Number of errors from failed jobs: %d. Number of errors: %d" % \
        (len(errs), len(errors))
    assert False not in [x in errors for x in errs]

    # print [job.state for job in jobs]
    # print("Number of started jobs - number of printed results:", s)
    # print("Number of failed jobs:", len(errors))
    assert s == len(errors), \
        "Parallelization Error, what happened to the rest?"
Exemplo n.º 18
0
 def enq_job_searchfragment(result, next_job):
     next_job.model = result
     next_job.fragments = root_problem.fragments
     JobPool().enqueue_job(next_job)
Exemplo n.º 19
0
        return h
            
s = 0    
lock = Lock()
if __name__ == '__main__':
    global pool
    pool1 = JobPool(2)
    pool2 = JobPool()
    if pool1 != pool2:
        raise Exception("hmmm, I thought JobPool is 'Singleton'")
    try:
        pool3 = JobPool(4)
    except Exception as e:
        print "As expected, making a new JobPool with a different cpu count failed: %s" %e
        
    pool = JobPool()
    jobs = []
    for j in range(1,20):
        job = TestJob(str(j))
        jobs.append(job)                
        pool.enqueue_job(job)
    
    
    sample_job = pool.get_asynch_result_object(jobs[3])
    
    #pool.terminate()
    
    pool.wait_for_all_jobs(ignore_error=True)
    
    # Test one of the jobs, to see if it is successful
    if sample_job.ready() and sample_job.successful():
Exemplo n.º 20
0
 def add_a_child(parent):
     print >> sys.stderr, "Adding a child job for %s" % (parent)
     JobPool().enqueue_job(TestJob("%s.child" % parent))
Exemplo n.º 21
0
        step = random()
        if step < 0.1:
            raise Exception("Some (truly) random error occurred in job %s." %
                            self.jobname)
        for i in range(0, 100):
            h += step * i
            time.sleep(step / 100)
        #self.state = step
        return h


s = 0
lock = Lock()
if __name__ == '__main__':
    global pool
    pool1 = JobPool(2)
    pool2 = JobPool()
    if pool1 != pool2:
        raise Exception("hmmm, I thought JobPool is 'Singleton'")
    try:
        pool3 = JobPool(4)
    except Exception as e:
        print "As expected, making a new JobPool with a different cpu count failed: %s" % e

    pool = JobPool()
    jobs = []
    for j in range(1, 20):
        job = TestJob(str(j))
        jobs.append(job)
        pool.enqueue_job(job)
Exemplo n.º 22
0
 def enq_job_searchfragment(result, search_job):
     search_job.hmmmodel = result
     JobPool().enqueue_job(search_job)
Exemplo n.º 23
0
def run():
    global pool
    pool1 = JobPool(2)
    pool2 = JobPool()
    if pool1 != pool2:
        raise Exception("hmmm, I thought JobPool is 'Singleton'")
    try:
        JobPool(4)
    except Exception as e:
        print(("As expected, making a new JobPool with a"
               " different cpu count failed: %s") % e)

    pool = JobPool()
    jobs = []
    for j in range(1, 20):
        job = TestJob(str(j))
        jobs.append(job)
        pool.enqueue_job(job)

    sample_job = pool.get_asynch_result_object(jobs[3])

    # pool.terminate()

    pool.wait_for_all_jobs(ignore_error=True)

    # Test one of the jobs, to see if it is successful
    if sample_job.ready() and sample_job.successful():
        assert (jobs[3].result_set is True)
    else:
        assert (jobs[3].result_set is False)

    errors = pool.get_all_job_errors()
    # print("Following job errors were raised:", errors)

    try:
        pool.wait_for_all_jobs(ignore_error=False)
    except Exception as e:
        print("Seems we have some jobs that failed (expected): ", e)

    errs = [pool.get_job_error(job) for job in pool.get_failed_jobs()]

    # print(errs)

    assert len(errs) == len(errors), \
        "Number of errors from failed jobs: %d. Number of errors: %d" % \
        (len(errs), len(errors))
    assert False not in [x in errors for x in errs]

    # print [job.state for job in jobs]
    # print("Number of started jobs - number of printed results:", s)
    # print("Number of failed jobs:", len(errors))
    assert s == len(errors), \
        "Parallelization Error, what happened to the rest?"
Exemplo n.º 24
0
 def tearDown(self):
     # clean up JobPool for other unit tests
     JobPool().terminate()
     JobPool().__init__(cpu_count())
Exemplo n.º 25
0
 def enqueue_firstlevel_job(self):
     for p in self.root_problem.children:
         for ap in p.children:
             JobPool().enqueue_job(ap.jobs["hmmbuild"])
Exemplo n.º 26
0
 def add_a_child(parent):
     # print("Adding a child job for %s" % (parent), file=sys.stderr)
     JobPool().enqueue_job(TestJob("%s.child" % parent))