示例#1
0
def msa(exe, seq, outfile='', trimming=False, verbose=False):
    """
    General use function for multiple sequence alignment (MSA).

    :param exe: str, path to the executable of a MSA program.
    :param seq: str, path to the multiple sequence file (must in FASTA format).
    :param outfile: str, path to the aligned sequence output (FASTA) file,
        default: [basename].[aligner].fasta, where basename is the filename of 
        the sequence file without known FASTA file extension, aligner is the 
        name of the aligner program in lowercase, and fasta is the extension 
        for fasta format file.
    :param trimming: bool, trim gaps and ambiguous sites if True, otherwise,
        leave them untouched.
    :param verbose: bool, invoke verbose or silent process mode,
        default: False, silent mode.
    :return: str, path to the aligned sequence output file (in FASTA format).
    """
    
    level = logging.INFO if verbose else logging.ERROR
    logger.setLevel(level)
    
    if os.path.isfile(seq):
        sequence = os.path.abspath(seq)
        
        if exe:
            aligner, func = _guess(exe)
            if func is None:
                error('Invalid or unsupported aligner executable (exe): {}, '
                      'alignment aborted.'.format(exe))
                sys.exit(1)
        else:
            error('Invalid aligner executable (exe), empty string, sequence '
                  'alignment aborted.')
            sys.exit(1)
        
        if not outfile:
            outfile = '.'.join([basename(sequence), aligner, 'fasta'])
        
        if os.path.isfile(outfile):
            info('Found pre-existing alignment file.')
        else:
            info('Aligning sequence {} using {}.'.format(sequence,
                                                         aligner.upper()))
            outfile = func(exe, sequence, outfile)
            info('Successfully aligned sequence, alignment was saved to '
                 '{}.'.format(outfile))
    else:
        error('Sequence: {} is not a file or does not exist.'.format(seq))
        sys.exit(1)
    if trimming:
        clean = ''.join([basename(outfile), '.trimmed.fasta'])
        if os.path.isfile(clean):
            outfile = clean
            info('Found pre-existing trimmed alignment.')
        else:
            _, outfile = trim(outfile, outfile=clean, verbose=verbose)
    return outfile
示例#2
0
def _sequencing(sequence, tree, aligner, ancestor, wd, asr_model, verbose):
    """
    Identify the type of the sequence file.
    
    :param sequence: str, path to a sequence data file.
    :param tree: str, path to a NEWICK tree file.
    :return: tuple, sequence, alignment, ancestor, and simulation data file.
    """

    if tree:
        utilities.Tree(tree, leave=True)
        AA, lengths, aa = set(AMINO_ACIDS), [], []

        with open(sequence) as handle:
            line = handle.readline().strip()
            if line.startswith('>'):
                handle.seek(0)
                records = SeqIO.parse(handle, 'fasta')
                for record in records:
                    lengths.append(len(record.seq))
                    aa.append(set(record.seq).issubset(AA))
            else:
                error('NEWICK format tree was provided, but the sequence file '
                      'was not in the FASTA format.')
                sys.exit(1)

        if len(set(lengths)) == 1:
            alignment = sequence
            if all(aa):
                trimmed = alignment
            else:
                trimmed = ''.join(
                    [utilities.basename(alignment), '.trimmed.fasta'])
                if os.path.isfile(trimmed):
                    info('Using pre-existed trimmed alignment file.')
                else:
                    _, trimmed = utilities.trim(alignment, outfile=trimmed)
        else:
            if aligner:
                aler, _ = msa._guess(aligner)
                outfile = ''.join(
                    [utilities.basename(sequence), '.{}.fasta'.format(aler)])
                if os.path.isfile(outfile):
                    info('Using pre-existed alignment file')
                    alignment = outfile
                    trimmed = ''.join(
                        [utilities.basename(alignment), '.trimmed.fasta'])
                    if os.path.isfile(trimmed):
                        info('Using pre-existed trimmed alignment file.')
                    else:
                        _, trimmed = utilities.trim(alignment, outfile=trimmed)
                else:
                    trimmed = msa.msa(aligner,
                                      sequence,
                                      verbose=verbose,
                                      outfile=outfile,
                                      trimming=True)
            else:
                error('FASTA format sequence file was provided, but no '
                      'alignment program was provided.')
                sys.exit(1)

        if trimmed:
            if ancestor:
                if trimmed.endswith('.trimmed.fasta'):
                    name = trimmed.replace('.trimmed.fasta', '')
                else:
                    name = trimmed

                aser, _ = asr._guess(ancestor)
                outfile = '{}.{}.tsv'.format(utilities.basename(name), aser)
                if os.path.isfile(outfile):
                    info('Using pre-existed ancestral states sequence file.')
                    sequence = outfile
                else:
                    sequence = asr.asr(ancestor,
                                       trimmed,
                                       tree,
                                       asr_model,
                                       verbose=verbose,
                                       outfile=outfile)
            else:
                error('No ancestral reconstruction program was provided.')
                sys.exit(1)
        else:
            sys.exit(1)

    tree, rate, records, aps, size = _load(sequence)
    return tree, rate, records, aps, size, sequence
示例#3
0
 def test_tsv(self):
     self.assertEqual('a.name', basename('a.name.tsv'))
示例#4
0
 def test_fasta(self):
     self.assertEqual('a.name', basename('a.name.fasta'))
示例#5
0
 def test_text(self):
     self.assertEqual('a.name', basename('a.name.text'))
示例#6
0
def _iqtree(exe, msa, model, cat, gamma, alpha, freq, invp, start_tree,
            constraint_tree, seed, outfile):
    """
    Infer ML phylogenetic tree using IQ-TREE.
    """
    
    wd = tempfile.mkdtemp(dir=os.path.dirname(os.path.abspath(msa)))
    shutil.copy(msa, os.path.join(wd, 'msa.fa'))
    if model.name:
        name = model.name.upper()
        if model.type == 'builtin':
            frequency = freq or model.frequency
            if frequency == 'estimate':
                frequency = 'FO'
            else:
                frequency = 'F'
            
            invpro = 'I' if (invp or model.invp) else ''
            gamma = gamma or model.gamma
            gamma = 'G{}'.format(gamma) if gamma else ''
            cat = cat if cat not in ('None', 'none', None) else 0
            rates = cat or model.rates
            rates = 'R{}'.format(rates) if rates else ''
            m = ['-m', '+'.join((i for i in [name, frequency, invpro, gamma, rates] if i))]
        else:
            m = ['-mdef', model.name]
    else:
        m = ['-m', 'TEST']

    info('Inferring ML tree for {} using IQ-TREE.'.format(msa))
    # iqtree_cmd = 'exe -s seq -t user_tree -pre prefix -nt 1 -seed seed
    # -quiet -m MFP -g constraint_tree'
    args = [exe, '-s', 'msa.fa', '-nt', '1', '-pre', 'tmf', '-seed', str(seed)]
    args.extend(m)
    # args.append('-quiet')
    
    if gamma and alpha:
        args.extend(['-a', str(alpha)])
    if invp:
        args.extend(['-i', str(invp)])
    if constraint_tree:
        args.extend(['-g', constraint_tree])
    try:
        # info('Running IQTree using the following command:\n\t'
        #      '{}'.format(' '.join(args)))
        process = Popen(args, cwd=wd, stdout=PIPE, stderr=PIPE,
                        universal_newlines=True)
        code = process.wait()
        if code:
            msg = indent(process.stderr.read(), prefix='\t')
            error('Inferring ML tree failed for {}\n{}'.format(msa, msg))
            sys.exit(1)
        else:
            tree = outfile if outfile else '{}.IQ-TREE.ML.newick'.format(
                    basename(msa))
            try:
                tree = shutil.copy(os.path.join(wd, 'tmf.treefile'), tree)
                info('Successfully save inferred gene tree to {}.'.format(
                        tree))
            except OSError:
                error('Path of outfile {} is not writeable, saving tree to '
                      'file failed.'.format(tree))
                sys.exit(1)
    
    except OSError:
        error('Inferring tree failed for {}, executable (exe) {} of '
              'IQ-TREE is invalid.'.format(msa, exe))
        sys.exit(1)
    finally:
        shutil.rmtree(wd)
    
    return tree
示例#7
0
def _phyml(exe, msa, model, cat, gamma, alpha, freq, invp, start_tree,
           constraint_tree, seed, outfile):
    """
    Infer ML phylogenetic tree using PhyML.
    """
    
    # cmd = 'exe -i seq -d aa -m JTT -f e|m -v invariable -c 4 -a gamma-alpha
    # --quiet --r_seed num -u user_tree_file'
    wd = tempfile.mkdtemp(dir=os.path.dirname(os.path.abspath(msa)))
    alignment = 'temporary.alignment.phylip'
    AlignIO.convert(msa, 'fasta', os.path.join(wd, alignment), 'phylip')
    
    if model.type == 'builtin':
        m = ['-m', model.name] if model.name else ['-m', 'LG']
    else:
        m = ['-m', 'custom', '--aa_rate_file', model.name]
    
    info('Inferring ML tree for {} using PhyML.'.format(msa))
    args = [exe, '-i', alignment, '-d', 'aa', '--r_seed', str(seed), '--quiet']
    args.extend(m)
    cat = cat if cat not in ('None', 'none', None) else 0
    if cat:
        args.extend(['--free_rates', cat])
    gamma = gamma or model.gamma
    if gamma:
        args.extend(['-c', str(gamma)])
    if alpha:
        args.extend(['-a', str(alpha)])
    frequency = freq or model.frequency
    frequency = 'X' if frequency == 'estimate' else 'F'
    if frequency == 'estimate':
        args.extend(['-f', 'e'])
    else:
        args.extend(['-f', 'm'])
    if start_tree:
        args.extend(['-u', start_tree])
        if constraint_tree:
            args.extend(['--constraint_file', constraint_tree])
    if model.invp:
        if invp:
            args.extend(['-v', str(invp)])
        else:
            args.extend(['-v', 'e'])
    else:
        if invp:
            args.extend(['-v', str(invp)])
    
    try:
        # info('Running FastTree using the following command:\n\t'
        #      '{}'.format(' '.join(args)))
        process = Popen(args, cwd=wd, stdout=PIPE, stderr=PIPE,
                        universal_newlines=True)
        code = process.wait()
        if code:
            msg = process.stderr.read() or process.stdout.read()
            msg = indent(msg, prefix='\t')
            error('Tree inferring failed for {}\n{}'.format(msa, msg))
            sys.exit(1)
        else:
            tree = outfile if outfile else '{}.PhyML.ML.newick'.format(
                    basename(msa))
            try:
                out = '{}{}'.format(os.path.join(wd, alignment),
                                    '_phyml_tree.txt')
                tree = shutil.copy(out, tree)
                info('Successfully save inferred ML tree to {}.'.format(
                        tree))
            except OSError:
                error('Path of outfile {} is not writeable, saving tree to '
                      'file failed.'.format(tree))
                sys.exit(1)
    except OSError:
        error('Tree inferring failed for {}, executable (exe) {} of PhyML '
              'is invalid.'.format(msa, exe))
        sys.exit(1)
    finally:
        shutil.rmtree(wd)
    return tree
示例#8
0
 def test_stockholm(self):
     self.assertEqual('a.name', basename('a.name.stockholm'))
示例#9
0
 def test_emb(self):
     self.assertEqual('a.name', basename('a.name.emb'))
示例#10
0
 def test_phylip_relaxed(self):
     self.assertEqual('a.name', basename('a.name.phylip-relaxed'))
示例#11
0
 def test_phy_sequential(self):
     self.assertEqual('a.name', basename('a.name.phylip-sequential'))
示例#12
0
 def test_phylip(self):
     self.assertEqual('a.name', basename('a.name.phylip'))
示例#13
0
 def test_clustal(self):
     self.assertEqual('a.name', basename('a.name.clustal'))
示例#14
0
 def test_aln(self):
     self.assertEqual('a.name', basename('a.name.aln'))
示例#15
0
def imc(sequence,
        tree='',
        aligner='',
        ancestor='',
        simulator='',
        asr_model='JTT',
        exp_model='JTT',
        n=100,
        divergent=True,
        indpairs=True,
        threshold=0.0,
        exp_prob=False,
        verbose=False):
    """
    Identify molecular parallel and convergent changes.
    
    :param sequence: str, path to the sequence data file. Sequence data file
        here covers a wide range of files and formats:
        
        * sequences: raw protein sequence file, need to be in FASTA format
          and a NEWICK format tree is also required for argument tree.
        * msa: multiple sequence alignment file, need to be in FASTA format
          and a NEWICK format tree is also required for argument tree.
        * ancestors: reconstructed ancestral states file, need to be in tsv
          (tab separated) file, the first line needs to start with #TREE,
          second line needs to be a blank line, and the rest lines in the
          file need to be tab separated sequence name (or ID) and amino
          acid sequences.
        * simulations: simulated sequences, need to be in tsv file, the
          first line needs to start with #TREE, second line needs to be
          a blank line, each dataset need to be separated by a blank line
          and inside each dataset block, each line should consist of tab
          separated sequence name (or ID) and amino acid sequences.
          
    :param tree: str, NEWICK format tree string or tree file. This need to be
        set according to argument sequence. if sequence is raw sequence file or
        MSA file, tree is required for guiding ancestral states reconstruction.
        If sequence is ancestors or simulations, then tree is not necessary.
    :param aligner: str, executable of an alignment program.
    :param ancestor: str, executable of an ancestral states reconstruction
        program.
    :param simulator: str, executable of an sequence simulation program.
    :param asr_model: str, model name or model file for ancestral states
        reconstruction, default: JTT.
    :param exp_model: str, model name or model file for estimate expected
        changes based on simulation or replacement probability manipulation,
        default: JTT.
    :param n: int, number of datasets (or duplicates) should be simulated.
    :param divergent: bool, identify divergent changes if True, or only
        identify parallel and convergent changes if False.
    :param indpairs: bool, only identify changes for independent branch pairs
        if true, or identify changes for all branch pairs if False.
    :param threshold: float, a probability threshold that ranges from 0.0 to
        1.0. If provided, only ancestral states with probability equal or
        larger than the threshold will be used, default: 0.0.
    :param exp_prob: bool, calculate the probability of expected changes if set
        to True and the exp_model contains a probability matrix. Time consuming
        process, be patient for the calculation.
    :param verbose: bool, invoke verbose or silent process mode,
        default: False, silent mode.
    :return: tuple, a dict object of counts of parallel replacements, a dict
        object of counts of convergent replacements, a list consists of details
        of replacements (namedtuple) and the p-value of AU Test (float or None).
    """

    logger.setLevel(logging.INFO if verbose else logging.ERROR)

    if os.path.isfile(sequence):
        sequence = os.path.abspath(sequence)
        wd = os.path.dirname(sequence)
    else:
        error('Invalid sequence {}, sequence is not a file or dose not '
              'exist, exited.'.format(sequence))
        sys.exit(1)

    basename = utilities.basename(sequence)
    rs = _sequencing(sequence, tree, aligner, ancestor, wd, asr_model, verbose)
    tree, rates, records, aps, size, sequence = rs

    basename_more = utilities.basename(sequence)
    pars, cons, divs, details, aup = None, None, None, None, None
    h1 = ['Category', 'BranchPair']
    h2 = ['Category', 'Position', 'BranchPair', 'R1', 'R2', 'Dataset']

    probs, pi = None, None
    if size == 1:
        h1.append('OBS')
        if exp_model:
            if simulator:
                h1.append('EXP')
                h1.extend(['SIM-{}'.format(i + 1) for i in range(n)])

            else:
                if exp_prob:
                    probs, pi = _load_matrix(exp_model)
                    if probs is not None:
                        h1.append('EXP')
    else:
        h1.append('EXP')
        h1.extend(['SIM-{}'.format(i + 1) for i in range(size)])

    tips = [v[0] for k, v in records.items() if not k.startswith('NODE')]
    length = len(tips[0])
    if size > 1:
        info('Estimating expected changes ... ')
    else:
        info('Identifying observed changes ...')
    tree, pars, cons, divs, details = _pc(tree, rates, records, aps, size,
                                          length, probs, pi, indpairs,
                                          threshold)

    if size == 1 and simulator:
        freq = _frequencing(tips, site=False)
        ts = tree.format('newick').strip()
        out = '{}.{}.tsv'.format(basename, sim._guess(simulator)[0])

        s = sim.sim(simulator,
                    ts,
                    model=exp_model,
                    length=length,
                    freq=freq,
                    n=n,
                    outfile=out,
                    verbose=verbose)

        if s and os.path.isfile(s):
            tree, rates, records, aps, size = _load(s)
            info('Estimating expected changes ... ')
            tree, par, con, div, detail = _pc(tree, rates, records, aps, size,
                                              length, None, None, indpairs,
                                              threshold)

            for k, v in par.items():
                pars[k].append(np.mean(v))
                cons[k].append(np.mean(con[k]))
                divs[k].append(np.mean(div[k]))
                pars[k].extend(v), cons[k].extend(con[k])
                divs[k].extend(div[k])
            details.extend(detail)

    if any([pars, cons, divs, details]):
        info('Writing identified parallel and convergent amino acid '
             'replacements to files.')
        counts = ''.join([basename_more, '.counts.tsv'])
        changes = ''.join([basename_more, '.details.tsv'])

        with open(counts, 'w') as o, open(changes, 'w') as c:
            o.write('{}\n'.format('\t'.join(h1)))
            s = lambda x: '{:.4f}'.format(x) if isinstance(x, float) else str(x
                                                                              )
            o.writelines('P\t{}\t{}\n'.format(k, '\t'.join([s(x) for x in v]))
                         for k, v in pars.items())
            o.writelines('C\t{}\t{}\n'.format(k, '\t'.join([s(x) for x in v]))
                         for k, v in cons.items())
            o.writelines('D\t{}\t{}\n'.format(k, '\t'.join([s(x) for x in v]))
                         for k, v in divs.items())

            c.write('{}\n'.format('\t'.join(h2)))
            c.writelines('{}\t{}\t{}\t{}\t{}\t{}\n'.format(*detail)
                         for detail in details)

    return pars, cons, divs, details, length
示例#16
0
 def test_maf(self):
     self.assertEqual('a.name', basename('a.name.maf'))
示例#17
0
def _fasttree(exe, msa, model, cat, gamma, alpha, freq, invp, start_tree,
              constraint_tree, seed, outfile):
    """
    Infer ML phylogenetic tree using FastTree.
    """
    
    if model.type == 'custom':
        error('Invalid model, FastTree does not accept custom model file '
              '{}.'.format(model))
        sys.exit(1)
    else:
        name = model.name if model.name else 'JTT'
        if name.lower() == 'jtt':
            info('Inferring ML tree using JTT model.')
            m = ''
        else:
            if name.lower in ['lg', 'wag']:
                info('Inferring ML tree using {} model.'.format(
                        model.upper()))
                m = '-{}'.format(name)
            else:
                error('Invalid model {}, FastTree only accepts JTT, WAG, '
                      'and LG model.'.format(name))
                sys.exit(1)
    
    info('Inferring ML tree for {} using FastTree.'.format(msa))
    args = [exe, '-quiet', '-nopr', '-seed', str(seed)]
    if start_tree and os.path.isfile(start_tree):
        args.extend(['-intree', start_tree])
    if m:
        args.append(m)
    if gamma or model.gamma:
        args.append('-gamma')
    if cat:
        if cat != 20:
            args.extend(['-cat', str(cat)])
    else:
        args.append('-nocat')
    args.append(os.path.basename(msa))
    
    tree = outfile if outfile else '{}.FastTree.ML.newick'.format(basename(msa))
    try:
        info('Running FastTree using the following command:\n\t'
             '{}'.format(' '.join(args)))
        with open(tree, 'w') as stdout:
            process = Popen(args, cwd=os.path.dirname(msa), stdout=stdout, stderr=PIPE,
                            universal_newlines=True)
        code = process.wait()
        if code:
            msg = indent(process.stderr.read(), prefix='\t')
            print(msg)
            error('Inferring ML tree failed for {}\n{}'.format(msa, msg))
            if os.path.isfile(tree):
                os.remove(tree)
            sys.exit(1)
        else:
            info('Successfully save inferred ML tree to {}.'.format(tree))
    except OSError:
        error('Inferring ML tree failed for {}, executable (exe) {} of '
              'FastTree is empty or invalid.'.format(msa, exe))
        if os.path.isfile(tree):
            os.remove(tree)
        sys.exit(1)
    return tree
示例#18
0
 def test_xmfa(self):
     self.assertEqual('a.name', basename('a.name.xmfa'))
示例#19
0
def _raxml(exe, msa, model, cat, gamma, alpha, freq, invp, start_tree,
           constraint_tree, seed, outfile):
    """
    Infer ML phylogenetic tree using RAxML.
    """

    wd = tempfile.mkdtemp(dir=os.path.dirname(os.path.abspath(msa)))
    shutil.copy(msa, os.path.join(wd, 'msa.fa'))
    if model.type == 'builtin':
        frequency = freq or model.frequency
        frequency = 'X' if frequency == 'estimate' else 'F'
        gamma = gamma or model.gamma
        invpro = 'I' if (model.invp or invp) else ''
        gamma = 'GAMMA' if gamma else 'CAT'
        if model.name:
            mm = ''.join(['PROT', gamma, invpro, model.name.upper(), frequency])
            model = ['-m', mm]
        else:
            model = ['-m', ''.join(['PROT', invpro, gamma, 'AUTO'])]
    else:
        model = ['-P', model.name]
    
    info('Inferring ML tree for {} using RAxML.'.format(msa))
    args = [exe, '-s', 'msa.fa', '-n', 'RAxML', '-p', str(seed), '--silent']
    args.extend(model)
    cat = cat if cat not in ('None', 'none', None) else 0
    if cat:
        args.extend(['-c', str(cat)])
    if start_tree:
        args.extend(['-t', start_tree])
    if constraint_tree:
        args.extend(['-r', constraint_tree])
    try:
        # info('Running FastTree using the following command:\n\t'
        #      '{}'.format(' '.join(args)))
        process = Popen(args, cwd=wd, stdout=DEVNULL, stderr=PIPE,
                        universal_newlines=True)
        code = process.wait()
        if code:
            msg = indent(process.stderr.read(), prefix='\t')
            error('Tree inferring failed for {}\n{}'.format(msa, msg))
            tree = ''
        else:
            tree = outfile if outfile else '{}.RAxML.ML.newick'.format(
                    basename(msa))
            try:
                tree = shutil.copy(os.path.join(wd, 'RAxML_bestTree.RAxML'),
                                   tree)
                info('Successfully save inferred ML tree to {}.'.format(
                        tree))
            except OSError:
                error('Path of outfile {} is not writeable, saving tree to '
                      'file failed.'.format(tree))
                sys.exit(1)
    except OSError:
        error('Tree inferring failed for {}, executable (exe) {} is '
              'invalid.'.format(msa, exe))
        sys.exit(1)
    finally:
        shutil.rmtree(wd)
    
    return tree
示例#20
0
 def test_new(self):
     self.assertEqual('a.name', basename('a.name.new'))
示例#21
0
def asr(exe, msa, tree, model, gamma=4, alpha=1.8, freq='',
        outfile='', verbose=False):
    """
    General use function for (marginal) ancestral states reconstruction (ASR).

    :param exe: str, path to the executable of an ASR program.
    :param msa: str, path to the MSA file (must in FASTA format).
    :param tree: str, path to the tree file (must in NEWICK format) or a NEWICK
        format tree string (must start with "(" and end with ";").
    :param model: str, substitution model for ASR. Either a path to a model
        file or a valid model string (name of an empirical model plus some 
        other options like gamma category and equilibrium frequency option).
        If a model file is in use, the file format of the model file depends
        on the ASR program, see the its documentation for details.
    :param gamma: int, The number of categories for the discrete gamma rate
        heterogeneity model. Without setting gamma, RAxML will use CAT model
        instead, while CODEML will use 4 gamma categories.
    :param freq: str, the base frequencies of the twenty amino acids.
        Accept empirical, or estimate, where empirical will set frequencies
        use the empirical values associated with the specified substitution
        model, and estimate will use a ML estimate of base frequencies.
    :param alpha: float, the shape (alpha) for the gamma rate heterogeneity.
    :param outfile: str, path to the output file. Whiteout setting, results
        of ancestral states reconstruction will be saved using the filename
        `[basename].[asrer].tsv`, where basename is the filename of MSA file
        without known FASTA file extension, asrer is the name of the ASR
        program (in lower case). The first line of the file will start with
        '#TREE' and followed by a TAB (\t) and then a NEWICK formatted tree
        string, the internal nodes were labeled. The second line of the tsv
        file is intentionally left as a blank line and the rest lines of the
        file are tab separated sequence IDs and amino acid sequences.
    :param verbose: bool, invoke verbose or silent (default) process mode.
    :return: tuple, the paths of the ancestral states file.

    .. note::
    
        If a tree (with branch lengths and/or internal nodes labeled) is
        provided, the branch lengths and internal node labels) will be ignored.
        
        If the model name combined with Gamma category numbers, i.e. JTT+G4,
        WAG+G8, etc., only the name of the model will be used. For all models
        contain G letter, a discrete Gamma model will be used to account for
        among-site rate variation. If there is a number after letter G,
        the number will be used to define number of categories in CODEML. For
        RAxML, the number of categories will always be set to 4 if G presented.
        
    """
    
    level = logging.INFO if verbose else logging.ERROR
    logger.setLevel(level)
    
    if os.path.isfile(msa):
        msa = os.path.abspath(msa)
    else:
        error('Ancestral reconstruction aborted, msa {} is not a file or '
              'does not exist.'.format(msa))
        sys.exit(1)
    
    tree = Tree(tree, leave=True)
        
    if not isinstance(model, str):
        error('Ancestral reconstruction aborted, model {} is not a valid '
              'model name or model file.'.format(model))
        sys.exit(1)
        
    model = modeling(model)
    asrer, func = _guess(exe)
    if not outfile:
        if msa.endswith('.trimmed.fasta'):
            name = msa.replace('.trimmed.fasta', '')
        else:
            name = msa
        outfile = '{}.{}.tsv'.format(basename(name), asrer)
    
    if os.path.isfile(outfile):
        info('Found pre-existing ancestral state file.')
    else:
        outfile = func(exe, msa, tree, model, gamma, alpha, freq, outfile)
    return outfile
示例#22
0
 def test_name(self):
     self.assertEqual('a.name', basename('a.name'))