示例#1
0
    def make_alignment(self, source_alignment_path):
        # Make the alignment
        self.alignment = Alignment()
        self.alignment.read(source_alignment_path)

        # TODO REMOVE -- this should be part of the checking procedure
        # We start by copying the alignment
        self.alignment_path = os.path.join(the_config.start_tree_path, 'source.phy')
        if os.path.exists(self.alignment_path):
            # Make sure it is the same
            old_align = Alignment()
            old_align.read(self.alignment_path)
            if not old_align.same_as(self.alignment):
                log.error("""Alignment file has changed since previous run. You
                     need to use the force-restart option.""")
                raise AnalysisError

            compare = lambda x, y: collections.Counter(x) == collections.Counter(y)

            if not compare(old_align.species, self.alignment.species):
                log.error("""Species names in alignment have changed since previous run. You
                     need to use the force-restart option.""")
                raise AnalysisError


        else:
            self.alignment.write(self.alignment_path)
    def make_alignment(self, cfg, alignment):
        # Make an Alignment from the source, using this subset
        sub_alignment = SubsetAlignment(alignment, self)
        sub_path = os.path.join(cfg.phyml_path, self.name + '.phy')
        # Add it into the sub, so we keep it around
        self.alignment_path = sub_path

        # Maybe it is there already?
        if os.path.exists(sub_path):
            log.debug("Found existing alignment file %s", sub_path)
            old_align = Alignment()
            old_align.read(sub_path)

            # It had better be the same!
            if not old_align.same_as(sub_alignment):
                log.error(
                    "It looks like you have changed one or more of the "
                    "data_blocks in the configuration file, "
                    "so the new subset alignments "
                    "don't match the ones stored for this analysis. "
                    "You'll need to run the program with --force-restart")
                raise SubsetError
        else:
            # We need to write it
            sub_alignment.write(sub_path)
示例#3
0
    def make_alignment(self, source_alignment_path):
        # Make the alignment
        self.alignment = Alignment()
        self.alignment.read(source_alignment_path)

        # TODO REMOVE -- this should be part of the checking procedure
        # We start by copying the alignment
        self.alignment_path = os.path.join(the_config.start_tree_path,
                                           'source.phy')
        if os.path.exists(self.alignment_path):
            # Make sure it is the same
            old_align = Alignment()
            old_align.read(self.alignment_path)
            if not old_align.same_as(self.alignment):
                log.error("""Alignment file has changed since previous run. You
                     need to use the force-restart option.""")
                raise AnalysisError

            compare = lambda x, y: collections.Counter(
                x) == collections.Counter(y)

            if not compare(old_align.species, self.alignment.species):
                log.error(
                    """Species names in alignment have changed since previous run. You
                     need to use the force-restart option.""")
                raise AnalysisError

        else:
            self.alignment.write(self.alignment_path)
示例#4
0
    def make_alignment(self, cfg, alignment):
        # Make an Alignment from the source, using this subset
        sub_alignment = SubsetAlignment(alignment, self)
        sub_path = os.path.join(cfg.phylofiles_path, self.name + '.phy')
        # Add it into the sub, so we keep it around
        self.alignment_path = sub_path

        # Maybe it is there already?
        if os.path.exists(sub_path):
            log.debug("Found existing alignment file %s", sub_path)
            old_align = Alignment()
            old_align.read(sub_path)

            # It had better be the same!
            if not old_align.same_as(sub_alignment):
                log.error(
                    "It looks like you have changed one or more of the "
                    "data_blocks in the configuration file, "
                    "so the new subset alignments "
                    "don't match the ones stored for this analysis. "
                    "You'll need to run the program with --force-restart")
                raise SubsetError
        else:
            # We need to write it
            sub_alignment.write(sub_path)
示例#5
0
    def make_alignment(self, source_alignment_path):
        # Make the alignment
        self.alignment = Alignment()
        self.alignment.read(source_alignment_path)
        # We start by copying the alignment
        self.alignment_path = os.path.join(self.cfg.start_tree_path, 'source.phy')
        if os.path.exists(self.alignment_path):
            # Make sure it is the same
            old_align = Alignment()
            old_align.read(self.alignment_path)
            if not old_align.same_as(self.alignment):
                log.error("Alignment file has changed since previous run. You need to use the force-restart option.")
                raise AnalysisError

        else:
            self.alignment.write(self.alignment_path)
    def make_alignment(self, cfg, alignment):
        # Make an Alignment from the source, using this subset
        sub_alignment = SubsetAlignment(alignment, self)
        sub_path = os.path.join(cfg.phylofiles_path, self.name + '.phy')
        # Add it into the sub, so we keep it around
        self.alignment_path = sub_path

        # Maybe it is there already?
        if os.path.exists(sub_path):
            log.debug("Found existing alignment file %s", sub_path)
            old_align = Alignment()
            old_align.read(sub_path)

            # It had better be the same!
            if not old_align.same_as(sub_alignment):
                log.error(self.FORCE_RESTART_MESSAGE)
                raise SubsetError
        else:
            # We need to write it
            sub_alignment.write(sub_path)
示例#7
0
    def make_alignment(self, source_alignment_path):
        # Make the alignment
        self.alignment = Alignment()
        self.alignment.read(source_alignment_path)

        # We start by copying the alignment
        self.alignment_path = os.path.join(self.cfg.start_tree_path,
                                           'source.phy')
        if os.path.exists(self.alignment_path):
            # Make sure it is the same
            old_align = Alignment()
            old_align.read(self.alignment_path)
            if not old_align.same_as(self.alignment):
                log.error(
                    "Alignment file has changed since previous run. You need to use the force-restart option."
                )
                raise AnalysisError

        else:
            self.alignment.write(self.alignment_path)
示例#8
0
    def make_alignment(self, cfg, alignment):
        # Make an Alignment from the source, using this subset
        sub_alignment = SubsetAlignment(alignment, self)

        sub_path = os.path.join(cfg.phylofiles_path, self.subset_id + '.phy')
        # Add it into the sub, so we keep it around
        self.alignment_path = sub_path

        # Maybe it is there already?
        if os.path.exists(sub_path):
            log.debug("Found existing alignment file %s" % sub_path)
            old_align = Alignment()
            old_align.read(sub_path)

            # It had better be the same!
            if not old_align.same_as(sub_alignment):
                log.error(self.FORCE_RESTART_MESSAGE)
                raise SubsetError
        else:
            # We need to write it
            sub_alignment.write(sub_path)
示例#9
0
    def analyse_subset(self, sub, models):
        """Analyse the subset using the models given
        This is the core place where everything comes together
        The results are placed into subset.result
        """

        log.debug("About to analyse %s using models %s", sub, ", ".join(list(models)))

        #keep people informed about what's going on
        #if we don't know the total subset number, we can usually get it like this
        if self.total_subset_num == None:
            self.total_subset_num = len(sub._cache)
        old_num_analysed = self.subsets_analysed
        self.subsets_analysed_set.add(sub.name)
        self.subsets_analysed = len(self.subsets_analysed_set)
        if self.subsets_analysed>old_num_analysed: #we've just analysed a subset we haven't seen yet
            percent_done = float(self.subsets_analysed)*100.0/float(self.total_subset_num)
            log.info("Analysing subset %d/%d: %.2f%s done" %(self.subsets_analysed,self.total_subset_num, percent_done, r"%"))

        subset_cache_path = os.path.join(self.cfg.subsets_path, sub.name + '.bin')
        # We might have already saved a bunch of results, try there first
        if not sub.results:
            log.debug("Reading in cached data from the subsets file")
            sub.read_cache(subset_cache_path)

        # First, see if we've already got the results loaded. Then we can
        # shortcut all the other checks
        models_done = set(sub.results.keys())
        log.debug("These models have already been done: %s", models_done)
        models_required = set(models)
        models_to_do = models_required - models_done
        log.debug("Which leaves these models still to analyse: %s", models_to_do)

        

        
        # Empty set means we're done
        if not models_to_do:
            log.debug("All models already done, so using just the cached results for subset %s", sub)
            #if models_done!=set(models): #redo model selection if we have different models
            sub.model_selection(self.cfg.model_selection, self.cfg.models)        
            return


        # Make an Alignment from the source, using this subset
        sub_alignment = SubsetAlignment(self.alignment, sub)
        sub_path = os.path.join(self.cfg.phyml_path, sub.name + '.phy')
        # Add it into the sub, so we keep it around
        sub.alignment_path = sub_path

        # Maybe it is there already?
        if os.path.exists(sub_path):
            log.debug("Found existing alignment file %s", sub_path)
            old_align = Alignment()
            old_align.read(sub_path)

            # It had better be the same!
            if not old_align.same_as(sub_alignment):
                log.error("It looks like you have changed one or more of the"
                        "data_blocks in the configuration file, "
                        "so the new subset alignments"
                        " don't match the ones stored for this analysis."
                        "You'll need to run the program with --force-restart")
                raise AnalysisError
        else:
            # We need to write it
            sub_alignment.write(sub_path)

        # Try and read in some previous analyses
        log.debug("Checking for old results in the phyml folder")
        self.parse_results(sub, models_to_do)
        if not models_to_do:
            #if models_done!=set(models): #redo model selection if we have different models
            sub.model_selection(self.cfg.model_selection, self.cfg.models)        
            return

        # What is left, we actually have to analyse...
        tasks = []

        #for efficiency, we rank the models by their difficulty - most difficult first
        difficulty = []        
        for m in models_to_do:
            difficulty.append(get_model_difficulty(m))
        
        #hat tip to http://scienceoss.com/sort-one-list-by-another-list/
        difficulty_and_m = zip(difficulty, models_to_do)
        difficulty_and_m.sort(reverse=True)
        sorted_difficulty, sorted_models_to_do = zip(*difficulty_and_m)
            
        log.debug("About to analyse these models, in this order: %s", sorted_models_to_do)
        for m in sorted_models_to_do:
            #a_path, out_path = phyml.make_analysis_path(self.cfg.phyml_path, sub.name, m)
            tasks.append((phyml.analyse, 
                          (m, sub_path, self.tree_path, self.cfg.branchlengths)))

        if self.threads == 1:
            self.run_models_concurrent(tasks)
        else:
            self.run_models_threaded(tasks)

        # Now parse the models we've just done
        self.parse_results(sub, models_to_do)

        # This should be empty NOW!
        if models_to_do:
            log.error("Failed to run models %s; not sure why", 
                      ", ".join(list(models_to_do)))
            raise AnalysisError

        # Now we have analysed all models for this subset, we do model selection
        # but ONLY on the models specified in the cfg file.
        sub.model_selection(self.cfg.model_selection, self.cfg.models)        
        
        # If we made it to here, we should write out the new summary
        self.rpt.write_subset_summary(sub)
        # We also need to update this
        sub.write_cache(subset_cache_path)