Exemplo n.º 1
0
    def run(self, pamlsrc, output_folder, model='M1'):
        """Run PAML using ETE.

        The default model is M1 as it is best for orthology inference in
        our case. You can use models `M2`, `M0`, `M3`.

        Ensure that you have the correct path to your codeml binary. It should
        be in the paml `/bin`.

        :param pamlsrc: Path to the codemly binary.
        :param output_folder: The name of the output folder.
        :param model: The model to be used. (Default value = 'M1')
        """

        # Import the newick tree
        tree = EvolTree('temptree.nw')

        # Import the alignment
        tree.link_to_alignment(self.alignmentfile)

        tree.workdir = self.workdir

        # Set the binpath of the codeml binary
        tree.execpath = pamlsrc
        # Run the model M1, M2, M3, or M0
        model_path = model + '.' + output_folder
        tree.run_model(model_path)
        self.ete3paml_log.info('Codeml is generating data in %s.' % model_path)
Exemplo n.º 2
0
    def main(self):
        """The main function for running the test."""

        print("Running model %s paml on input." % str(self.defaultmodel))

        tree = EvolTree(self.tree)  # Import the newick tree
        tree.link_to_alignment(self.alignment)  # Import the alignment
        tree.workdir = self.workdir  # Set the working directory
        tree.execpath = self.pamlpath  # Set the binpath of the codeml binary
        tree.run_model(self.defaultmodel)  # Run the codeml model
Exemplo n.º 3
0
def ete3paml(gene, paml_path, workdir='data/paml-output/', model='M1'):
    """ Use ETE3's integration with PAML"""

    # Import the species tree to compare species that are present in alignment
    # file
    t = Tree('data/initial-data/species_tree.nw', format=1)
    orgsfile = pd.read_csv('data/initial-data/organisms.csv', header=None)

    # Create a list name/variable and use list()
    orgs = list(orgsfile[0])
    organismslist = formatlist(orgs)

    # Import alignment file as string
    alignment_file = open(
        'data/clustal-output/' + gene + '_Aligned/' + gene +
        '_aligned_cds_nucl.fasta', 'r')
    alignment_str = alignment_file.read()
    alignment_file.close()

    # Keep the branches in the species tree for species in the alignment file
    # Some species may not be present in the alignment file
    branches2keep = []
    for organism in organismslist:
        if organism in alignment_str:
            #print('Yup.')
            branches2keep.append(organism)
        else:
            pass
            #print('Nope.') Make an error code in the log

    # Input a list of branches to keep on the base tree
    speciestree = t.prune(branches2keep, preserve_branch_length=True)

    # Import the newick tree
    tree = EvolTree(speciestree)

    # Import the alignment
    tree.link_to_alignment('data/clustal-output/' + gene + '_Aligned/' + gene +
                           '_aligned_cds_nucl.fasta')

    tree.workdir = workdir

    # Set the binpath of the codeml binary
    tree.execpath = paml_path

    # Run the codeml model
    tree.run_model(model + '.' + gene)