Пример #1
0
def pxrr(tree_file, output_dir):
    """Unroot the tree returned by treeshrink."""
    unrooted = util.file_name(tree_file)
    cmd = ' '.join([
        'pxrr', '--unroot', '--treef {}'.format(tree_file),
        '--outf {}'.format(unrooted)
    ])

    with util.cd(output_dir):
        subprocess.check_call(cmd, shell=True)
        util.remove_files('phyx.logfile')

    return unrooted
def raxml(fasta_file, output_dir, output_ext, seq_type, cpus, seed):
    """Build a tree with raxml."""
    model = "PROTCATWAG" if seq_type == "aa" else "GTRCAT"
    tree = util.file_name(fasta_file, output_ext)
    cmd = ' '.join([
        'raxml', '-T {}'.format(cpus), '-p {}'.format(seed),
        '-m {}'.format(model), '-s {}'.format(fasta_file), '-n {}'.format(tree)
    ])

    with util.cd(output_dir):
        subprocess.check_call(cmd, shell=True)
        tree_src = 'RAxML_bestTree.' + tree
        move(tree_src, tree)
        util.remove_files('RAxML_*')

    return tree
def pasta(fasta_file, output_dir, output_ext, seq_type, cpus):
    """Align sequences."""
    in_path = fasta_file
    if seq_type == 'aa':
        in_path = bio.adjust_aa_seqs(fasta_file, output_dir)

    cmd = ' '.join([
        which('run_pasta.py'),
        '--datatype {}'.format('Protein' if seq_type == 'aa' else 'DNA'),
        '--num-cpus {}'.format(cpus), "--input '{}'".format(in_path),
        "--output-directory '{}'".format(abspath(output_dir))
    ])

    with util.cd(output_dir):
        subprocess.check_call(cmd, shell=True)

        base_name = splitext(basename(fasta_file))[0]
        temp_aligned = 'pastajob.marker001.' + base_name + EXT
        aligned = base_name + output_ext
        move(temp_aligned, aligned)

        util.remove_files('pastajob*')

    return aligned
Пример #4
0
def pxclsq(fasta_file, output_dir, output_ext, seq_type, min_occupancy,
           min_len):
    """Filter aligned sequences for occupancy and length."""
    ext = output_ext + EXT_PXCLSQ
    temp_cleaned = util.file_name(fasta_file, ext)

    cmd = ' '.join([
        'pxclsq', '--aminoacid' if seq_type == 'aa' else '',
        '--prop {}'.format(min_occupancy), '--seqf {}'.format(fasta_file),
        '--outf {}'.format(basename(temp_cleaned))
    ])

    cleaned = util.file_name(fasta_file, output_ext)

    with util.cd(output_dir):
        subprocess.check_call(cmd, shell=True)
        with open(temp_cleaned) as in_file, open(cleaned, 'w') as out_file:
            for header, seq in SimpleFastaParser(in_file):
                if len(seq.replace('-', '')) >= min_len:
                    bio.write_fasta_record(out_file, header, seq)

        util.remove_files('phyx.logfile')

    return cleaned
def raxml_bs(fasta_file,
             output_dir,
             output_ext,
             seq_type,
             cpus,
             seed,
             replicates=100):
    """Build a bootstrapped tree with raxml."""
    model = "PROTCATWAG" if seq_type == "aa" else "GTRCAT"
    tree = util.file_name(fasta_file, output_ext)
    cmd = ' '.join([
        'raxml', '-T {}'.format(cpus), '-f a', '-x {}'.format(seed),
        '-p {}'.format(seed), '-m {}'.format(model),
        '-# {}'.format(replicates), '-s {}'.format(fasta_file),
        '-n {}'.format(tree)
    ])

    with util.cd(output_dir):
        subprocess.check_call(cmd, shell=True)
        tree_src = 'RAxML_bipartitions.' + tree
        move(tree_src, tree)
        util.remove_files('RAxML_*')

    return tree