Exemplo n.º 1
0
    def changePermissions(self, path, itemNumber):
        # Example: tempAHc28U/step0/centroid/pw/r5d2_r5d1/tempopal_waXAP/out.fasta
        # First file:
        finalItemNumber = len(path.split("/"))
        startItemNumber = finalItemNumber - itemNumber
        currentItem = 0

        # MESSENGER.send_info("[JMAbuin] The start item is " + str(startItemNumber))
        # MESSENGER.send_info("[JMAbuin] The final item is " + str(finalItemNumber))

        basePath = ""

        for itemPath in path.split("/"):

            if (itemPath):

                basePath = basePath + "/" + itemPath

                if ((currentItem < finalItemNumber) and (currentItem >= startItemNumber) and (os.path.isfile(basePath) or os.path.isdir(basePath))):
                    # MESSENGER.send_info("[JMAbuin] Changing permissions of " + basePath)
                    try:
                        os.chmod(basePath, stat.S_IRWXO | stat.S_IRWXG | stat.S_IRWXU)
                    except Exception as e:
                        MESSENGER.send_error("[JMAbuin] ERROR Changing permissions: "+e.message)

            currentItem += 1
Exemplo n.º 2
0
    def create_job(self, alignment, guide_tree=None, **kwargs):
        job_id = kwargs.get('context_str', '') + '_mafft'
        if alignment.get_num_taxa() == 0:
            return FakeJob(alignment, context_str=job_id)
        new_alignment = alignment.unaligned()
        if new_alignment.get_num_taxa() < 2:
            return FakeJob(new_alignment, context_str=job_id)
        scratch_dir, seqfn, alignedfn = self._prepare_input(
            new_alignment, **kwargs)

        invoc = []
        if platform.system() == "Windows":
            invoc.append(self.exe)
        else:
            invoc.extend([self.exe])
        if len(alignment) <= 200 and new_alignment.max_sequence_length(
        ) < 50000:
            invoc.extend(['--localpair', '--maxiterate', '1000'])
        if '--ep' not in self.user_opts:
            invoc.extend(['--ep', '0.123'])
        invoc.extend(['--quiet'])
        invoc.extend(self.user_opts)

        if get_sparkcontext():
            #import multiprocessing
            #available_cpus = multiprocessing.cpu_count()
            MESSENGER.send_info("Using spark to launch MAFFT")
            invoc.extend(['--thread', str(kwargs.get('num_cpus_spark', 1))])
            #invoc.extend(['--thread', str(kwargs.get('num_cpus_spark', available_cpus))])
        else:
            import multiprocessing
            available_cpus = multiprocessing.cpu_count()
            MESSENGER.send_info("NOT using spark to launch MAFFT")
            invoc.extend(
                ['--thread',
                 str(kwargs.get('num_cpus', available_cpus))])
        invoc.append(seqfn)

        # The MAFFT job creation is slightly different from the other
        #   aligners because we redirect and read standard output.

        return self._finish_standard_job(alignedfn=alignedfn,
                                         datatype=alignment.datatype,
                                         invoc=invoc,
                                         scratch_dir=scratch_dir,
                                         job_id=job_id,
                                         delete_temps=kwargs.get(
                                             'delete_temps',
                                             self.delete_temps),
                                         stdout=alignedfn)
Exemplo n.º 3
0
def spark_align(joblist, num_partitions):
    _LOG.debug("SPARK alignment starting")
    sc = get_sparkcontext()
    global input_data
    lightjoblist = list()
    # dir_path = os.path.dirname(os.path.realpath(__file__))
    dir_path = os.getcwd()
    for job in joblist:
        pa = job.start()

        # Create a list of pairs (job, data)
        #MESSENGER.send_info(pa[0])
        lightjoblist.append(
            (LightJobForProcess(pa[0], pa[1],
                                os.environ), input_data[pa[0][-1]].items()))
    # Parallelize the list of pairs (job, data)
    #rdd_joblist = sc.parallelize(lightjoblist, len(lightjoblist))

    MESSENGER.send_info("[JMAbuin] The number of light jobs is: " +
                        str(len(lightjoblist)))

    if (num_partitions == 0):
        rdd_joblist = sc.parallelize(lightjoblist, len(lightjoblist))
    else:
        rdd_joblist = sc.parallelize(lightjoblist, num_partitions)
    # This two lines are for testing purposes only
    # for j in lightjoblist:
    #   do_align(j)

    # For each pair, do alignment
    # as output, get an RDD with pairs (out_filename, out_data)
    out = rdd_joblist.map(do_align)

    # Collect out and write the data to corresponding files in master's disk
    results = out.collect()
    for res in results:
        with open(res[0], mode='w+b') as f:
            f.writelines(res[1])

    # Switch down Spark
    _LOG.debug("SPARK alignment finished")
    _LOG.debug("Deactivating Spark")
    setSpark(False)

    # Finish the jobs
    for job in joblist:
        job.results = job.result_processor()
        job.finished_event.set()
        job.get_results()
        job.postprocess()
Exemplo n.º 4
0
    def run_prelim_step1(self):
        self.start_time = time.time()
        self.last_improvement_time = self.start_time
        num_non_update_iter = 0
        configuration = self.configuration()
        # Here we check if the max_subproblem_frac is more stringent than max_subproblem_size
        frac_max = int(math.ceil(self.max_subproblem_frac * self.tree.n_leaves))
        if frac_max > self.max_subproblem_size:
            configuration['max_subproblem_size'] = frac_max
        MESSENGER.send_info('Max subproblem set to {0}'.format(
            configuration['max_subproblem_size']))
        if configuration['max_subproblem_size'] >= self.tree.n_leaves:
            MESSENGER.send_warning('''\n
WARNING: you have specified a max subproblem ({0}) that is equal to or greater
    than the number of taxa ({0}). Thus, the PASTA algorithm will not be invoked
    under the current configuration (i.e., no tree decomposition will occur).
    If you did not intend for this behavior (which you probably did not since
    you are using PASTA) please adjust your settings for the max subproblem and
    try running PASTA again. If you intended to use PASTA to align your data with
    the specified aligner tool *without* any decomposition, you can ignore this
    message.\n'''.format(configuration['max_subproblem_size'],
                         self.tree.n_leaves))
        if configuration['max_subproblem_size'] == 1:
            MESSENGER.send_error(
                ''' You have specified a max subproblem size of 1. PASTA requires a max subproblem size of at least 2.  ''')
            sys.exit(1)
        delete_iteration_temps = not self.keep_iteration_temporaries
        delete_realignment_temps = delete_iteration_temps or (not self.keep_realignment_temporaries)
        configuration['delete_temps'] = delete_realignment_temps
        return configuration, delete_iteration_temps
Exemplo n.º 5
0
    def run(self, tmp_dir_par, pasta_products=None):
        assert(os.path.exists(tmp_dir_par))

        self._reset_current_run_settings()
        self._reset_jobs()

        self.start_time = time.time()
        self.last_improvement_time = self.start_time

        num_non_update_iter = 0

        configuration = self.configuration()
        # Here we check if the max_subproblem_frac is more stringent than max_subproblem_size
        frac_max = int(math.ceil(self.max_subproblem_frac*self.tree.n_leaves))
        if frac_max > self.max_subproblem_size:
            configuration['max_subproblem_size'] = frac_max
        MESSENGER.send_info('Max subproblem set to {0}'.format(
                configuration['max_subproblem_size']))
        if configuration['max_subproblem_size'] >= self.tree.n_leaves:
            MESSENGER.send_warning('''\n
WARNING: you have specified a max subproblem ({0}) that is equal to or greater
    than the number of taxa ({0}). Thus, the PASTA algorithm will not be invoked
    under the current configuration (i.e., no tree decomposition will occur).
    If you did not intend for this behavior (which you probably did not since
    you are using PASTA) please adjust your settings for the max subproblem and
    try running PASTA again. If you intended to use PASTA to align your data with
    the specified aligner tool *without* any decomposition, you can ignore this
    message.\n'''.format(configuration['max_subproblem_size'],
                       self.tree.n_leaves))
        if configuration['max_subproblem_size'] == 1:
             MESSENGER.send_error(''' You have specified a max subproblem size of 1. PASTA requires a max subproblem size of at least 2.  ''')
             sys.exit(1)

        delete_iteration_temps = not self.keep_iteration_temporaries
        delete_realignment_temps = delete_iteration_temps or (not self.keep_realignment_temporaries)
        configuration['delete_temps'] = delete_realignment_temps

        while self._keep_iterating():
            record_timestamp(os.path.join(tmp_dir_par, 'start_pastaiter_timestamp.txt'))

            # create a subdirectory for this iteration
            curr_iter_tmp_dir_par = os.path.join(tmp_dir_par, 'step' + str(self.current_iteration))
            curr_iter_tmp_dir_par = self.pasta_team.temp_fs.create_subdir(curr_iter_tmp_dir_par)
            _LOG.debug('directory %s created' % curr_iter_tmp_dir_par)
            break_strategy_index = 0
            this_iter_score_improved = False

            while True:
                break_strategy =  self._get_break_strategy(break_strategy_index)
                if not bool(break_strategy):
                    break
                context_str = "iter%d-%s" % (self.current_iteration, break_strategy)
                # create a subdirectory for this iteration/break_strategy
                curr_tmp_dir_par = os.path.join(curr_iter_tmp_dir_par, break_strategy)
                curr_tmp_dir_par = self.pasta_team.temp_fs.create_subdir(curr_tmp_dir_par)
                record_timestamp(os.path.join(curr_tmp_dir_par, 'start_align_timestamp.txt'))
                # Align (with decomposition...)
                self.status('Step %d. Realigning with decomposition strategy set to %s' % (self.current_iteration, break_strategy))
                if self.killed:
                    raise RuntimeError("PASTA Job killed")
                tree_for_aligner = self.get_tree_copy()
                aligner = PASTAAlignerJob(multilocus_dataset=self.multilocus_dataset,
                                         pasta_team=self.pasta_team,
                                         tree=tree_for_aligner,
                                         tmp_base_dir=curr_tmp_dir_par,
                                         reset_recursion_index=True,
                                         skip_merge=self.pastamerge,
                                         **configuration)
                self.pasta_aligner_job = aligner
                aligner.launch_alignment(break_strategy=break_strategy,
                                         context_str=context_str)                
                if self.pastamerge:
                    _LOG.debug("Build PASTA merge jobs")
                    subsets_tree = self.build_subsets_tree(curr_tmp_dir_par)
                    if len(self.pasta_team.subsets.values()) == 1:
                        # can happen if there are no decompositions
                        for job in self.pasta_team.alignmentjobs:
                            jobq.put(job)
                        new_multilocus_dataset = self.pasta_team.subsets.values()[0].get_results()
                    else:
                        pariwise_tmp_dir_par = os.path.join(curr_tmp_dir_par, "pw")
                        pariwise_tmp_dir_par = self.pasta_team.temp_fs.create_subdir(pariwise_tmp_dir_par)    
                        pmj = PASTAMergerJob(multilocus_dataset=self.multilocus_dataset,
                                             pasta_team=self.pasta_team,
                                             tree=subsets_tree,
                                             tmp_base_dir=pariwise_tmp_dir_par,
                                             reset_recursion_index=True,   
                                             #delete_temps2=False,                                      
                                             **configuration)
                                                
                        pmj.launch_alignment(context_str=context_str)
                        
                        # Start alignment jobs
                        for job in self.pasta_team.alignmentjobs:
                            jobq.put(job)
                            
                            
                        new_multilocus_dataset = pmj.get_results()
                        del pmj  
                    
                    self.pasta_team.alignmentjobs = []
                    self.pasta_team.subsets = {}                                                                  
                else:          
                    new_multilocus_dataset = aligner.get_results()
                
                _LOG.debug("Alignment obtained. Preparing for tree.")
                self.pasta_aligner_job = None
                del aligner

                record_timestamp(os.path.join(curr_tmp_dir_par, 'start_treeinference_timestamp.txt'))
                # Tree inference
                if self.start_tree_search_from_current:
                    start_from = self.tree
                else:
                    start_from = None
                self.status('Step %d. Alignment obtained. Tree inference beginning...' % (self.current_iteration))
                if self.killed:
                    raise RuntimeError("PASTA Job killed")                             
            
                tbj = self.pasta_team.tree_estimator.create_job(new_multilocus_dataset,
                                                               starting_tree=start_from,
                                                               num_cpus=self.num_cpus,
                                                               context_str=context_str + " tree",
                                                               tmp_dir_par=curr_tmp_dir_par,
                                                               delete_temps=delete_iteration_temps,
                                                               pasta_products=pasta_products,
                                                               step_num=self.current_iteration,
                                                               mask_gappy_sites = self.mask_gappy_sites)
                prev_curr_align = self.curr_iter_align_tmp_filename
                prev_curr_tree = self.curr_iter_tree_tmp_filename
                self.curr_iter_align_tmp_filename = pasta_products.get_abs_path_for_iter_output(self.current_iteration, TEMP_SEQ_ALIGNMENT_TAG, allow_existing=True)
                self.curr_iter_tree_tmp_filename = pasta_products.get_abs_path_for_iter_output(self.current_iteration, TEMP_TREE_TAG, allow_existing=True)

                self.tree_build_job = tbj
                jobq.put(tbj)
                new_score, new_tree_str = tbj.get_results()
                self.tree_build_job = None
                del tbj
                if self.killed:
                    raise RuntimeError("PASTA Job killed")

                record_timestamp(os.path.join(curr_tmp_dir_par, 'end_treeinference_timestamp.txt'))
                curr_timestamp = time.time()
                accept_iteration = False

                if self.score is None:
                    self.score = new_score

                if self.best_score is None or new_score > self.best_score:
                    self.store_optimum_results(new_multilocus_dataset,
                            new_tree_str,
                            new_score,
                            curr_timestamp)
                    this_iter_score_improved = True
                    accept_iteration = True

                if self._get_accept_mode(new_score=new_score, break_strategy_index=break_strategy_index) == AcceptMode.BLIND_MODE:
                    if self.blind_mode_is_final:
                        self.is_stuck_in_blind = True
                        if self.switch_to_blind_timestamp is None:
                            if self._blindmode_trigger:
                                _LOG.debug("Blind runmode trigger = %s" % self._blindmode_trigger)
                            self.switch_to_blind_iter = self.current_iteration
                            self.switch_to_blind_timestamp = curr_timestamp
                    accept_iteration = True

                if accept_iteration:
                    self.score = new_score
                    self.multilocus_dataset = new_multilocus_dataset
                    self.tree_str = new_tree_str
                    if this_iter_score_improved:
                        self.status('realignment accepted and score improved.')
                    else:
                        self.status('realignment accepted and despite the score not improving.')
                    # we do not want to continue to try different breaking strategies for this iteration so we break
                    self.status('current score: %s, best score: %s' % (self.score, self.best_score) )
                    break
                else:
                    self.status('realignment NOT accepted.')
                    self.curr_iter_align_tmp_filename = prev_curr_align
                    self.curr_iter_tree_tmp_filename = prev_curr_tree 

                break_strategy_index += 1

                # self.status('current score: %s, best score: %s' % (self.score, self.best_score) )
                
            if not this_iter_score_improved:
                self.num_iter_since_imp += 1
            self.current_iteration += 1

        if self._termination_trigger:
            _LOG.debug("Termination trigger = %s" % self._termination_trigger)
        record_timestamp(os.path.join(tmp_dir_par, 'end_pastaiter_timestamp.txt'))

        ### TODO: if configuration is 'return_final_iter_TreeAndAlignpair', then skip the following three lines
        if not self.return_final_tree_and_alignment:
            self.multilocus_dataset = self.best_multilocus_dataset.new_with_shared_meta()
            for locus_alignment in self.best_multilocus_dataset:
                self.multilocus_dataset.append(copy.copy(locus_alignment))
            self.tree_str = self.best_tree_str
            self.score = self.best_score
        else:
            assert self.multilocus_dataset is not None
            assert self.tree_str is not None
            assert self.score is not None
Exemplo n.º 6
0
    def run(self, tmp_dir_par, pasta_products=None):
        assert (os.path.exists(tmp_dir_par))

        self._reset_current_run_settings()
        self._reset_jobs()

        self.start_time = time.time()
        self.last_improvement_time = self.start_time

        num_non_update_iter = 0

        configuration = self.configuration()
        # Here we check if the max_subproblem_frac is more stringent than max_subproblem_size
        frac_max = int(math.ceil(self.max_subproblem_frac *
                                 self.tree.n_leaves))
        if frac_max > self.max_subproblem_size:
            configuration['max_subproblem_size'] = frac_max
        MESSENGER.send_info('Max subproblem set to {0}'.format(
            configuration['max_subproblem_size']))
        if configuration['max_subproblem_size'] >= self.tree.n_leaves:
            MESSENGER.send_warning('''\n
WARNING: you have specified a max subproblem ({0}) that is equal to or greater
    than the number of taxa ({0}). Thus, the PASTA algorithm will not be invoked
    under the current configuration (i.e., no tree decomposition will occur).
    If you did not intend for this behavior (which you probably did not since
    you are using PASTA) please adjust your settings for the max subproblem and
    try running PASTA again. If you intended to use PASTA to align your data with
    the specified aligner tool *without* any decomposition, you can ignore this
    message.\n'''.format(configuration['max_subproblem_size'],
                         self.tree.n_leaves))
        if configuration['max_subproblem_size'] == 1:
            MESSENGER.send_error(
                ''' You have specified a max subproblem size of 1. PASTA requires a max subproblem size of at least 2.  '''
            )
            sys.exit(1)

        delete_iteration_temps = not self.keep_iteration_temporaries
        delete_realignment_temps = delete_iteration_temps or (
            not self.keep_realignment_temporaries)
        configuration['delete_temps'] = delete_realignment_temps

        while self._keep_iterating():
            record_timestamp(
                os.path.join(tmp_dir_par, 'start_pastaiter_timestamp.txt'))

            # create a subdirectory for this iteration
            curr_iter_tmp_dir_par = os.path.join(
                tmp_dir_par, 'step' + str(self.current_iteration))
            curr_iter_tmp_dir_par = self.pasta_team.temp_fs.create_subdir(
                curr_iter_tmp_dir_par)
            _LOG.debug('directory %s created' % curr_iter_tmp_dir_par)
            break_strategy_index = 0
            this_iter_score_improved = False

            while True:
                break_strategy = self._get_break_strategy(break_strategy_index)
                if not bool(break_strategy):
                    break
                context_str = "iter%d-%s" % (self.current_iteration,
                                             break_strategy)
                # create a subdirectory for this iteration/break_strategy
                curr_tmp_dir_par = os.path.join(curr_iter_tmp_dir_par,
                                                break_strategy)
                curr_tmp_dir_par = self.pasta_team.temp_fs.create_subdir(
                    curr_tmp_dir_par)
                record_timestamp(
                    os.path.join(curr_tmp_dir_par,
                                 'start_align_timestamp.txt'))
                # Align (with decomposition...)
                self.status(
                    'Step %d. Realigning with decomposition strategy set to %s'
                    % (self.current_iteration, break_strategy))
                if self.killed:
                    raise RuntimeError("PASTA Job killed")
                tree_for_aligner = self.get_tree_copy()
                aligner = PASTAAlignerJob(
                    multilocus_dataset=self.multilocus_dataset,
                    pasta_team=self.pasta_team,
                    tree=tree_for_aligner,
                    tmp_base_dir=curr_tmp_dir_par,
                    reset_recursion_index=True,
                    skip_merge=self.pastamerge,
                    **configuration)
                self.pasta_aligner_job = aligner
                aligner.launch_alignment(break_strategy=break_strategy,
                                         context_str=context_str)
                if self.pastamerge:
                    _LOG.debug("Build PASTA merge jobs")
                    subsets_tree = self.build_subsets_tree(
                        curr_tmp_dir_par, self.build_MST)
                    if len(self.pasta_team.subsets) == 1:
                        # can happen if there are no decompositions
                        for job in self.pasta_team.alignmentjobs:
                            jobq.put(job)
                        new_multilocus_dataset = list(
                            self.pasta_team.subsets.values())[0].get_results()
                    else:
                        pariwise_tmp_dir_par = os.path.join(
                            curr_tmp_dir_par, "pw")
                        pariwise_tmp_dir_par = self.pasta_team.temp_fs.create_subdir(
                            pariwise_tmp_dir_par)
                        pmj = PASTAMergerJob(
                            multilocus_dataset=self.multilocus_dataset,
                            pasta_team=self.pasta_team,
                            tree=subsets_tree,
                            tmp_base_dir=pariwise_tmp_dir_par,
                            reset_recursion_index=True,
                            #delete_temps2=False,
                            **configuration)

                        pmj.launch_alignment(context_str=context_str)

                        # Start alignment jobs
                        for job in self.pasta_team.alignmentjobs:
                            jobq.put(job)

                        new_multilocus_dataset = pmj.get_results()
                        del pmj

                    self.pasta_team.alignmentjobs = []
                    self.pasta_team.subsets = {}
                else:
                    new_multilocus_dataset = aligner.get_results()

                _LOG.debug("Alignment obtained. Preparing for tree.")
                self.pasta_aligner_job = None
                del aligner

                record_timestamp(
                    os.path.join(curr_tmp_dir_par,
                                 'start_treeinference_timestamp.txt'))
                # Tree inference
                if self.start_tree_search_from_current:
                    start_from = self.tree
                else:
                    start_from = None
                self.status(
                    'Step %d. Alignment obtained. Tree inference beginning...'
                    % (self.current_iteration))
                if self.killed:
                    raise RuntimeError("PASTA Job killed")

                tbj = self.pasta_team.tree_estimator.create_job(
                    new_multilocus_dataset,
                    starting_tree=start_from,
                    num_cpus=self.num_cpus,
                    context_str=context_str + " tree",
                    tmp_dir_par=curr_tmp_dir_par,
                    delete_temps=delete_iteration_temps,
                    pasta_products=pasta_products,
                    step_num=self.current_iteration,
                    mask_gappy_sites=self.mask_gappy_sites)
                prev_curr_align = self.curr_iter_align_tmp_filename
                prev_curr_tree = self.curr_iter_tree_tmp_filename
                self.curr_iter_align_tmp_filename = pasta_products.get_abs_path_for_iter_output(
                    self.current_iteration,
                    TEMP_SEQ_ALIGNMENT_TAG,
                    allow_existing=True)
                self.curr_iter_tree_tmp_filename = pasta_products.get_abs_path_for_iter_output(
                    self.current_iteration, TEMP_TREE_TAG, allow_existing=True)

                self.tree_build_job = tbj
                jobq.put(tbj)
                new_score, new_tree_str = tbj.get_results()
                self.tree_build_job = None
                del tbj
                if self.killed:
                    raise RuntimeError("PASTA Job killed")

                record_timestamp(
                    os.path.join(curr_tmp_dir_par,
                                 'end_treeinference_timestamp.txt'))
                curr_timestamp = time.time()
                accept_iteration = False

                if self.score is None:
                    self.score = new_score

                if self.best_score is None or new_score > self.best_score:
                    self.store_optimum_results(new_multilocus_dataset,
                                               new_tree_str, new_score,
                                               curr_timestamp)
                    this_iter_score_improved = True
                    accept_iteration = True

                if self._get_accept_mode(
                        new_score=new_score,
                        break_strategy_index=break_strategy_index
                ) == AcceptMode.BLIND_MODE:
                    if self.blind_mode_is_final:
                        self.is_stuck_in_blind = True
                        if self.switch_to_blind_timestamp is None:
                            if self._blindmode_trigger:
                                _LOG.debug("Blind runmode trigger = %s" %
                                           self._blindmode_trigger)
                            self.switch_to_blind_iter = self.current_iteration
                            self.switch_to_blind_timestamp = curr_timestamp
                    accept_iteration = True

                if accept_iteration:
                    self.score = new_score
                    self.multilocus_dataset = new_multilocus_dataset
                    self.tree_str = new_tree_str
                    if this_iter_score_improved:
                        self.status('realignment accepted and score improved.')
                    else:
                        self.status(
                            'realignment accepted and despite the score not improving.'
                        )
                    # we do not want to continue to try different breaking strategies for this iteration so we break
                    self.status('current score: %s, best score: %s' %
                                (self.score, self.best_score))
                    break
                else:
                    self.status('realignment NOT accepted.')
                    self.curr_iter_align_tmp_filename = prev_curr_align
                    self.curr_iter_tree_tmp_filename = prev_curr_tree

                break_strategy_index += 1

                # self.status('current score: %s, best score: %s' % (self.score, self.best_score) )

            if not this_iter_score_improved:
                self.num_iter_since_imp += 1
            self.current_iteration += 1

        if self._termination_trigger:
            _LOG.debug("Termination trigger = %s" % self._termination_trigger)
        record_timestamp(
            os.path.join(tmp_dir_par, 'end_pastaiter_timestamp.txt'))

        ### TODO: if configuration is 'return_final_iter_TreeAndAlignpair', then skip the following three lines
        if not self.return_final_tree_and_alignment:
            self.multilocus_dataset = self.best_multilocus_dataset.new_with_shared_meta(
            )
            for locus_alignment in self.best_multilocus_dataset:
                self.multilocus_dataset.append(copy.copy(locus_alignment))
            self.tree_str = self.best_tree_str
            self.score = self.best_score
        else:
            assert self.multilocus_dataset is not None
            assert self.tree_str is not None
            assert self.score is not None
Exemplo n.º 7
0
    from pasta.mainpasta import pasta_main
    from pasta import MESSENGER
    sys.setrecursionlimit(100000)
    _PASTA_DEBUG = os.environ.get('PASTA_DEBUG')
    _DEVELOPER = _PASTA_DEBUG and _PASTA_DEBUG != '0'

    if not _DEVELOPER:
        _PASTA_DEVELOPER = os.environ.get('PASTA_DEVELOPER')
        _DEVELOPER = _PASTA_DEVELOPER and _PASTA_DEVELOPER != '0'
    try:
        rc, temp_dir, temp_fs = pasta_main()
        if not rc:
            raise ValueError("Unknown PASTA execution error")
        if (temp_dir is not None) and (os.path.exists(temp_dir)):
            MESSENGER.send_info(
                "Note that temporary files from the run have not been deleted, they can be found in:\n   '%s'\n"
                % temp_dir)
            if sys.platform.lower().startswith('darwin') and ("'"
                                                              not in temp_dir):
                MESSENGER.send_info('''
If you cannot see this directory in the Finder application, you may want to use
the 'open' command executed from a Terminal.  You can do this by launching the
/Applications/Utilities/Terminal program and then typing

open '%s'

followed by a return at the prompt. If the argument to the open command is a
directory, then it should open a Finder window in the directory (even if that
directory is hidden by default).
''' % temp_dir)
    except Exception, x:
Exemplo n.º 8
0
    from pasta import MESSENGER

    sys.setrecursionlimit(100000)
    _PASTA_DEBUG = os.environ.get("PASTA_DEBUG")
    _DEVELOPER = _PASTA_DEBUG and _PASTA_DEBUG != "0"

    if not _DEVELOPER:
        _PASTA_DEVELOPER = os.environ.get("PASTA_DEVELOPER")
        _DEVELOPER = _PASTA_DEVELOPER and _PASTA_DEVELOPER != "0"
    try:
        rc, temp_dir, temp_fs = pasta_main()
        if not rc:
            raise ValueError("Unknown PASTA execution error")
        if (temp_dir is not None) and (os.path.exists(temp_dir)):
            MESSENGER.send_info(
                "Note that temporary files from the run have not been deleted, they can be found in:\n   '%s'\n"
                % temp_dir
            )
            if sys.platform.lower().startswith("darwin") and ("'" not in temp_dir):
                MESSENGER.send_info(
                    """
If you cannot see this directory in the Finder application, you may want to use
the 'open' command executed from a Terminal.  You can do this by launching the
/Applications/Utilities/Terminal program and then typing

open '%s'

followed by a return at the prompt. If the argument to the open command is a
directory, then it should open a Finder window in the directory (even if that
directory is hidden by default).
"""
                    % temp_dir
Exemplo n.º 9
0
    def runwithpipes(self):
        k = self._k
        # Use a working temporary directory in the cluster nodes
        with TemporaryDirectory() as tempdir:
            k['cwd'] = tempdir
            k['stderr'] = PIPE
            k['stdout'] = PIPE

            for key, v in self.environ.items():
                os.environ[key] = v

            _LOG.debug('Launching %s.' % " ".join(self._invocation))
            _LOG.debug('Options %s.', k)

            command = ""
            for item in self._invocation:
               command = command + " "+item
            MESSENGER.send_info("[JMAbuin] Initial mafft command "+command)

            # process = Popen(command, stdin=PIPE, shell=True, **k)

            err_msg = []
            err_msg.append("PASTA failed because one of the programs it tried to run failed.")
            err_msg.append('The invocation that failed was: \n    "%s"\n' % '" "'.join(self._invocation))

            output = ""

            try:
                if (os.path.isfile(self._invocation[0])):
                    MESSENGER.send_info("[JMAbuin] " + self._invocation[0] + " exists!")
                else:
                    MESSENGER.send_warning("[JMAbuin] " + self._invocation[0] + " does not exists! Finding it.")

                    if (os.path.isfile(os.getcwd() + "/pasta.zip/bin/mafft")):
                        MESSENGER.send_info("[JMAbuin] Found mafft!! => " + os.getcwd() + "/pasta.zip/bin/mafft")
                        self._invocation[0] = os.getcwd() + "/pasta.zip/bin/mafft"

                    else:

                        newMafftPath = self.findMafft("../")
                        if(os.path.isfile(newMafftPath)):
                            MESSENGER.send_info("[JMAbuin] new found mafft path is "+newMafftPath)
                            self._invocation[0] = newMafftPath
                        else:
                            MESSENGER.send_error("[JMAbuin] Could not find mafft!!")

                    MESSENGER.send_info("[JMAbuin] Final mafft" + self._invocation[0])

                startTime = time.time()
                process = Popen(self._invocation, stdin=PIPE, **k)

                (output, output_err) = process.communicate()

                endTime = time.time()

                self.return_code = process.returncode

                MESSENGER.send_info("[JMAbuin] :: runwithpipes :: return code from " + self._invocation[0] + " is: " + str(self.return_code) + " and execution time is: " + str(endTime - startTime) + " seconds.")

                process.stdin.close()
                process.stdout.close()
                process.stderr.close()

                if self.return_code:
                    # errorFromFile = self.read_stderr(_stderr_fo)
                    if output_err:
                        err_msg.append(output_err)
                    self.error = "\n".join(err_msg)
                    raise Exception("")
                _LOG.debug(
                    'Finished %s.\n Return code: %s; %s' % (" ".join(self._invocation), self.return_code, self.error))

            except OSError as ose:
                err_msg.append(str(ose))
                MESSENGER.send_error("[JMAbuin] " + ose.message)
                MESSENGER.send_error("[JMAbuin] " + ose.child_traceback)
                sys.exit(ose.message+" :: "+ose.child_traceback)

            except Exception as e:
                err_msg.append(str(e))
                MESSENGER.send_error("[JMAbuin] " + str(e))
                self.error = "\n".join(err_msg)
                _LOG.error(self.error)
                sys.exit(e.message)
            return output
Exemplo n.º 10
0
    def run(self):
        _LOG.debug('launching %s.' % " ".join(self._invocation))
        k = self._k
        proc_cwd = k.get('cwd', os.curdir)
        stdout_file_path = k.get('stdout', None)
        stderr_file_path = k.get('stderr', None)
        if stdout_file_path:
            _stdout_fo = open_with_intermediates(stdout_file_path, 'w')
        else:
            _stdout_fo = open_with_intermediates(os.path.join(proc_cwd, '.Job.stdout.txt'), 'w')
        k['stdout'] = _stdout_fo
        if stderr_file_path:
            _stderr_fo = open_with_intermediates(stderr_file_path, 'w')
        else:
            _stderr_fo = open_with_intermediates(os.path.join(proc_cwd, '.Job.stderr.txt'), 'w')
        k['stderr'] = _stderr_fo

        for key, v in self.environ.items():
            os.environ[key] = v

        err_msg = []
        err_msg.append("PASTA failed because one of the programs it tried to run failed.")
        err_msg.append('The invocation that failed was: \n    "%s"\n' % '" "'.join(self._invocation))

        self.return_code = 0  # Initialization of return code

        try:

            command = ""
            for item in self._invocation:
                command = command + " "+item

            # MESSENGER.send_info("[JMAbuin] Initial command "+command)

            if configure_spark.isSpark():

                fileIndex = 0

                if(self._invocation[fileIndex] == "java"): # Case of opal
                    # MESSENGER.send_info("[JMAbuin] We are launching OPAL")
                    fileIndex = 3

                    # Check if Java is in our path
                    # MESSENGER.send_info("[JMAbuin] JAVA_HOME is " + os.getenv('JAVA_HOME'))
                    # MESSENGER.send_info("[JMAbuin] PATH is " + os.getenv('PATH'))

                    newJava = os.getenv('JAVA_HOME')+"/bin/java"

                    if newJava is not None:
                        self._invocation[0] = newJava

                    '''
                    current_dir = os.getcwd()
                    current_theoretical_dir = ""
                    currentItem = 0
                    totalItems = len(self._invocation[5].split("/"))

                    for item in self._invocation[5].split("/"):
                        if(currentItem > 0) and (totalItems - currentItem > 7):
                            current_theoretical_dir = current_theoretical_dir + "/" + item
                        currentItem += 1

                    if(current_dir != current_theoretical_dir):
                        self._invocation[5] = self._invocation[5].replace(current_theoretical_dir, current_dir)
                        self._invocation[7] = self._invocation[5].replace(current_theoretical_dir, current_dir)
                        self._invocation[9] = self._invocation[5].replace(current_theoretical_dir, current_dir)
                    '''
                    # Example: tempAHc28U/step0/centroid/pw/r5d2_r5d1/tempopal_waXAP/out.fasta
                    # First file:
                    self.changePermissions(self._invocation[5], 7)
                    self.changePermissions(self._invocation[7], 7)
                    self.changePermissions(self._invocation[9], 7)

                execname = os.path.basename(self._invocation[fileIndex])

                if (not os.path.isfile(self._invocation[fileIndex])):
                    MESSENGER.send_warning("[JMAbuin] " + self._invocation[fileIndex] + " does not exists! Finding it.")

                    if(os.path.isfile(os.getcwd() + "/pasta.zip/bin/" + execname)):
                        MESSENGER.send_info("[JMAbuin] Found " + execname + "!! => " + os.getcwd() + "/pasta.zip/bin/" + execname)
                        self._invocation[fileIndex] = os.getcwd() + "/pasta.zip/bin/" + execname

                    else:

                        newInvocationPath = self.findFile("../", execname)

                        if (os.path.isfile(newInvocationPath)):
                            MESSENGER.send_info("[JMAbuin] new " + execname + " path is " + newInvocationPath)
                            self._invocation[fileIndex] = newInvocationPath
                        else:
                            MESSENGER.send_error("[JMAbuin] Could not find " + execname + "!!")

                # MESSENGER.send_info("[JMAbuin] Final " + execname + " => " + self._invocation[fileIndex])

                command = ""
                for item in self._invocation:
                    command = command + " " + item

                #number_of_cpus = os.environ["OMP_NUM_THREADS"]

                #MESSENGER.send_info("[JMAbuin] Final command " + command + "with " + number_of_cpus + " CPUS")

            startTime = time.time()

            process = Popen(self._invocation, stdin=PIPE, **k)
            self.return_code = process.wait() # Chema aqui

            endTime = time.time()

            # process = Popen(self._invocation, stdin=PIPE, **k)
            # (output, output_err) = process.communicate()
            # self.return_code = process.returncode

            MESSENGER.send_info("[JMAbuin] :: run :: return code from " + self._invocation[0] + " is: " + str(self.return_code) + " and execution time is: " + str(endTime - startTime) + " seconds.")

            if "fasttreeMP" in self._invocation[0]:
                MESSENGER.send_info("[JMAbuin] running fastree in parallel")

                command = ""
                for item in self._invocation:
                    command = command + " " + item

                number_of_cpus = os.environ["OMP_NUM_THREADS"]

                MESSENGER.send_info("[JMAbuin] Final command " + command + " with " + number_of_cpus+" CPUS")

            _stdout_fo.close()
            _stderr_fo.close()
            process.stdin.close()

            if self.return_code < 0:
                errorFromFile = self.read_stderr(_stderr_fo)
                if errorFromFile:
                    err_msg.append(errorFromFile)
                    # err_msg.append(output_err)
                self.error = "\n".join(err_msg)
                raise Exception("")
            _LOG.debug('Finished %s.\n Return code: %s; %s' % (" ".join(self._invocation), self.return_code, self.error))

        except OSError as ose:
            err_msg.append(str(ose))
            self.error = "\n".join(err_msg)

            MESSENGER.send_error("[JMAbuin] " + ose.message)
            MESSENGER.send_error("[JMAbuin] " + ose.child_traceback)
            _LOG.error(self.error)
            sys.exit(ose.message + " :: "+ose.child_traceback)

        except Exception as e:
            err_msg.append(str(e.message))
            self.error = "\n".join(err_msg)
            MESSENGER.send_error("[JMAbuin] " + self.error)
            _LOG.error(self.error)
            sys.exit(e.message)